{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 数据预处理-基于Python"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 一、数据框的查找"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>state</th>\n",
       "      <th>year</th>\n",
       "      <th>pop</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>Ohio</td>\n",
       "      <td>2000</td>\n",
       "      <td>1.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>Ohio</td>\n",
       "      <td>2001</td>\n",
       "      <td>1.7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>Ohio</td>\n",
       "      <td>2002</td>\n",
       "      <td>3.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>Nevada</td>\n",
       "      <td>2001</td>\n",
       "      <td>2.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>Nevada</td>\n",
       "      <td>2002</td>\n",
       "      <td>2.9</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    state  year  pop\n",
       "0    Ohio  2000  1.5\n",
       "1    Ohio  2001  1.7\n",
       "2    Ohio  2002  3.6\n",
       "3  Nevada  2001  2.4\n",
       "4  Nevada  2002  2.9"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "data = {'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada', 'Nevada'],\n",
    "        'year':[2000, 2001, 2002, 2001, 2002, 2003],\n",
    "        'pop': [1.5, 1.7, 3.6, 2.4, 2.9, 3.2]}\n",
    "df = pd.DataFrame(data) # 生成数据框\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>state</th>\n",
       "      <th>year</th>\n",
       "      <th>pop</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>Ohio</td>\n",
       "      <td>2000</td>\n",
       "      <td>1.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>Ohio</td>\n",
       "      <td>2001</td>\n",
       "      <td>1.7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>Ohio</td>\n",
       "      <td>2002</td>\n",
       "      <td>3.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>Nevada</td>\n",
       "      <td>2001</td>\n",
       "      <td>2.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>Nevada</td>\n",
       "      <td>2002</td>\n",
       "      <td>2.9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>5</td>\n",
       "      <td>Nevada</td>\n",
       "      <td>2003</td>\n",
       "      <td>3.2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    state  year  pop\n",
       "0    Ohio  2000  1.5\n",
       "1    Ohio  2001  1.7\n",
       "2    Ohio  2002  3.6\n",
       "3  Nevada  2001  2.4\n",
       "4  Nevada  2002  2.9\n",
       "5  Nevada  2003  3.2"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.dtypes #查看每列变量的数据类型\n",
    "df['state']=df['state'].astype(\"category\") # 转换数据类型为类别类型或名义变量或因子类型。\n",
    "df['year'] = pd.to_datetime(df['year'].apply(str),format=\"%Y-%m-%d\").dt.year #首先转换为字符串，然后只让显示年份。\n",
    "df.dtypes\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['pop', 'state', 'year'], dtype='object')"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.columns # 查看列变量名"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(6, 3)"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.shape # 数据的行和列数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[Ohio, Nevada]\n",
       "Categories (2, object): [Ohio, Nevada]"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['state'].unique() # 查看某一列的取值种类, 并且可以找出错乱的字符"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Ohio      3\n",
       "Nevada    3\n",
       "Name: state, dtype: int64"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['state'].value_counts() #查看某列每个取值出现的次数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>pop</th>\n",
       "      <th>state</th>\n",
       "      <th>year</th>\n",
       "      <th>GDP</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.5</td>\n",
       "      <td>Ohio</td>\n",
       "      <td>2000</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.7</td>\n",
       "      <td>Ohio</td>\n",
       "      <td>2001</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3.6</td>\n",
       "      <td>Ohio</td>\n",
       "      <td>2002</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2.4</td>\n",
       "      <td>Nevada</td>\n",
       "      <td>2001</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2.9</td>\n",
       "      <td>Nevada</td>\n",
       "      <td>2002</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   pop   state  year  GDP\n",
       "0  1.5    Ohio  2000    1\n",
       "1  1.7    Ohio  2001    2\n",
       "2  3.6    Ohio  2002    3\n",
       "3  2.4  Nevada  2001    4\n",
       "4  2.9  Nevada  2002    5"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "GDP=np.arange(df.shape[0])+1 # 添加新的变量\n",
    "df.insert(3,'GDP',GDP)\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>pop</th>\n",
       "      <th>state</th>\n",
       "      <th>year</th>\n",
       "      <th>GDP</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>1.5</td>\n",
       "      <td>Ohio</td>\n",
       "      <td>2000</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>1.7</td>\n",
       "      <td>Ohio</td>\n",
       "      <td>2001</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>c</th>\n",
       "      <td>3.6</td>\n",
       "      <td>Ohio</td>\n",
       "      <td>2002</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>d</th>\n",
       "      <td>2.4</td>\n",
       "      <td>Nevada</td>\n",
       "      <td>2001</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>e</th>\n",
       "      <td>2.9</td>\n",
       "      <td>Nevada</td>\n",
       "      <td>2002</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   pop   state  year  GDP\n",
       "a  1.5    Ohio  2000    1\n",
       "b  1.7    Ohio  2001    2\n",
       "c  3.6    Ohio  2002    3\n",
       "d  2.4  Nevada  2001    4\n",
       "e  2.9  Nevada  2002    5"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.index=['a','b','c','d','e','f'] # 定义新的索引\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>year</th>\n",
       "      <th>GDP</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>1970-01-01 00:00:00.000002000</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>1970-01-01 00:00:00.000002001</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                           year  GDP\n",
       "a 1970-01-01 00:00:00.000002000    1\n",
       "b 1970-01-01 00:00:00.000002001    2"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.loc['a'] # location是位置的意思，即通过索引来查看行\n",
    "df.iloc[0:2] # \"i\"是integer的缩写，写出行数来查看。其实等同于直接df[0:2]\n",
    "df.loc[['a','b'],['year','GDP']] # 查看指定的行和列\n",
    "#df.iloc[[0,2],[1,3]] # 指定数量"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>pop</th>\n",
       "      <th>state</th>\n",
       "      <th>year</th>\n",
       "      <th>GDP</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>1.5</td>\n",
       "      <td>Ohio</td>\n",
       "      <td>1970-01-01 00:00:00.000002000</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>1.7</td>\n",
       "      <td>Ohio</td>\n",
       "      <td>1970-01-01 00:00:00.000002001</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>c</th>\n",
       "      <td>3.6</td>\n",
       "      <td>Ohio</td>\n",
       "      <td>1970-01-01 00:00:00.000002002</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   pop state                          year  GDP\n",
       "a  1.5  Ohio 1970-01-01 00:00:00.000002000    1\n",
       "b  1.7  Ohio 1970-01-01 00:00:00.000002001    2\n",
       "c  3.6  Ohio 1970-01-01 00:00:00.000002002    3"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[df['state']==\"Ohio\"] # 查看满足条件的所有行"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 二、数据框修改"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>state</th>\n",
       "      <th>year</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>Ohio</td>\n",
       "      <td>1970-01-01 00:00:00.000002000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>Ohio</td>\n",
       "      <td>1970-01-01 00:00:00.000002001</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>c</th>\n",
       "      <td>Ohio</td>\n",
       "      <td>1970-01-01 00:00:00.000002002</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>d</th>\n",
       "      <td>Nevada</td>\n",
       "      <td>1970-01-01 00:00:00.000002001</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>e</th>\n",
       "      <td>Nevada</td>\n",
       "      <td>1970-01-01 00:00:00.000002002</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    state                          year\n",
       "a    Ohio 1970-01-01 00:00:00.000002000\n",
       "b    Ohio 1970-01-01 00:00:00.000002001\n",
       "c    Ohio 1970-01-01 00:00:00.000002002\n",
       "d  Nevada 1970-01-01 00:00:00.000002001\n",
       "e  Nevada 1970-01-01 00:00:00.000002002"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "new_df=df.drop(['pop','GDP'],axis=1) # 丢弃列数据\n",
    "new_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>pop</th>\n",
       "      <th>state</th>\n",
       "      <th>year</th>\n",
       "      <th>GDP</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>1.5</td>\n",
       "      <td>Ohio</td>\n",
       "      <td>1970-01-01 00:00:00.000002000</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>1.7</td>\n",
       "      <td>Ohio</td>\n",
       "      <td>1970-01-01 00:00:00.000002001</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>c</th>\n",
       "      <td>3.6</td>\n",
       "      <td>Ohio</td>\n",
       "      <td>1970-01-01 00:00:00.000002002</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>d</th>\n",
       "      <td>2.4</td>\n",
       "      <td>Nevada</td>\n",
       "      <td>1970-01-01 00:00:00.000002001</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>e</th>\n",
       "      <td>2.9</td>\n",
       "      <td>Nevada</td>\n",
       "      <td>1970-01-01 00:00:00.000002002</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   pop   state                          year  GDP\n",
       "a  1.5    Ohio 1970-01-01 00:00:00.000002000    1\n",
       "b  1.7    Ohio 1970-01-01 00:00:00.000002001   10\n",
       "c  3.6    Ohio 1970-01-01 00:00:00.000002002    3\n",
       "d  2.4  Nevada 1970-01-01 00:00:00.000002001    4\n",
       "e  2.9  Nevada 1970-01-01 00:00:00.000002002    5"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.loc[\"b\",\"GDP\"]=10 # 赋值\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   one  two  three\n",
      "a    0    1      2\n",
      "b    3    4      5\n",
      "c    6    7      8\n"
     ]
    }
   ],
   "source": [
    "f = lambda x : x.max() - x.min()   \n",
    "a = np.arange(9).reshape(3,3)    \n",
    "data = pd.DataFrame(a,index=[\"a\",\"b\",\"c\"],columns=[\"one\",\"two\",\"three\"])   \n",
    "print(data)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "one      6\n",
      "two      6\n",
      "three    6\n",
      "dtype: int64\n"
     ]
    }
   ],
   "source": [
    "print(data.apply(f)) # 选择列中最大最小值之差"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "a    2\n",
      "b    2\n",
      "c    2\n",
      "dtype: int64\n"
     ]
    }
   ],
   "source": [
    "print(data.apply(f,axis=1)) # 选择行中最大最小值之差"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "one       9\n",
      "two      12\n",
      "three    15\n",
      "dtype: int64\n"
     ]
    }
   ],
   "source": [
    "print(data.sum())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "one      3.0\n",
      "two      4.0\n",
      "three    5.0\n",
      "dtype: float64\n"
     ]
    }
   ],
   "source": [
    "print(data.mean())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 三、缺失值处理\n",
    "在对缺失数据进行处理前，了解数据缺失的机制和形式是十分必要的。将数据集中不含缺失值的变量称为完全变量，数据集中含有缺失值的变量称为不完全变量。而从缺失的分布来将缺失可以分为完全随机缺失，随机缺失和完全非随机缺失。\n",
    "* 完全随机缺失（missing completely at random,MCAR）：指的是数据的缺失是完全随机的，不依赖于任何不完全变量或完全变量，不影响样本的无偏性，如家庭地址缺失；\n",
    "* 随机缺失(missing at random,MAR)：指的是数据的缺失不是完全随机的，即该类数据的缺失依赖于其他完全变量，如财务数据缺失情况与企业的大小有关；\n",
    "* 非随机缺失(missing not at random,MNAR)：指的是数据的缺失与不完全变量自身的取值有关，如高收入人群不原意提供家庭收入；\n",
    "\n",
    "对于随机缺失和非随机缺失，直接删除记录是不合适的。随机缺失可以通过已知变量对缺失值进行估计，而非随机缺失的非随机性还没有很好的解决办法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 108,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>one</th>\n",
       "      <th>two</th>\n",
       "      <th>three</th>\n",
       "      <th>four</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>a</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>10.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>b</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>c</td>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "      <td>8</td>\n",
       "      <td>20.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   one  two  three  four\n",
       "a    0    1      2  10.0\n",
       "b    3    4      5   NaN\n",
       "c    6    7      8  20.0"
      ]
     },
     "execution_count": 108,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data[\"four\"]=[10,np.nan,20]\n",
    "data.head()\n",
    "#data.isnull() # 查看缺失值位置"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 109,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>one</th>\n",
       "      <th>two</th>\n",
       "      <th>three</th>\n",
       "      <th>four</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>a</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>10.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>b</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>c</td>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "      <td>8</td>\n",
       "      <td>20.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   one  two  three  four\n",
       "a    0    1      2  10.0\n",
       "b    3    4      5   NaN\n",
       "c    6    7      8  20.0"
      ]
     },
     "execution_count": 109,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.dropna(how='any') # 去掉存在缺失值所有行\n",
    "data.dropna(how=\"all\")  # 去掉都是缺失值所在的行\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#统计重复记录数\n",
    "data.duplicated().sum()\n",
    "data.drop_duplicates()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 116,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>one</th>\n",
       "      <th>two</th>\n",
       "      <th>three</th>\n",
       "      <th>four</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>a</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>10.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>b</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>15.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>c</td>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "      <td>8</td>\n",
       "      <td>20.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   one  two  three  four\n",
       "a    0    1      2  10.0\n",
       "b    3    4      5  15.0\n",
       "c    6    7      8  20.0"
      ]
     },
     "execution_count": 116,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.fillna(100)\n",
    "data.fillna(data.mean())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 120,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   one  two  three  four\n",
      "a    0    1      2  10.0\n",
      "b    3    4      5  15.0\n",
      "c    6    7      8  20.0\n"
     ]
    }
   ],
   "source": [
    "print(data.fillna({'one':1,'two':2,'four':data['four'].mean()})) # 定义字典以不同形式填充每列缺失值"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "### 四、异常值识别与处理\n",
    "模型通常是对整体样本数据结构的一种表达方式，这种表达方式通常抓住的是整体样本一般性的性质，而那些在这些性质上表现完全与整体样本不一致的点，我们就称其为异常点。一般异常值的检测方法有基于统计的方法，基于聚类的方法，以及一些专门检测异常值的方法等。\n",
    "\n",
    "常用的异常值处理方式包括：\n",
    "* 删除含有异常值的记录：直接将含有异常值的记录删除；\n",
    "* 视为缺失值：将异常值视为缺失值，利用缺失值处理的方法进行处理；\n",
    "* 平均值修正：可用前后两个观测值的平均值修正该异常值；\n",
    "* 不处理：直接在具有异常值的数据集上进行数据挖掘；"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 20 entries, 0 to 19\n",
      "Data columns (total 5 columns):\n",
      "num1    20 non-null int32\n",
      "num2    20 non-null int32\n",
      "num3    20 non-null int32\n",
      "num4    20 non-null int32\n",
      "num5    20 non-null int32\n",
      "dtypes: int32(5)\n",
      "memory usage: 528.0 bytes\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "a=np.arange(100).reshape(20,5)\n",
    "df1=pd.DataFrame(a, columns=[\"num1\",\"num2\",\"num3\",\"num4\",\"num5\"])\n",
    "df1.info() # or data.describe() 对数据整体进行描述"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x2635d34bf28>"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAD8CAYAAAB5Pm/hAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAE2hJREFUeJzt3X+M5HV9x/HnW8E7yhER0e0JF5foNeH8hbKhNqbJrlZFNCKttFijqNSTBi7as+mdmFStIbmrHiSihd4F4pmqJ/FHQUGE0p0YU1GB8vu0nnLAyhX8CbcoVw7e/WO/OCMuN7P7/c7O7Gefj2Sy3/nMZ77znvfNvPZ7n50fkZlIksr1lEEXIEnqL4Nekgpn0EtS4Qx6SSqcQS9JhTPoJalwBr0kFc6gl6TCGfSSVLiDBl0AwJFHHpmjo6ODLoOHHnqIQw89dNBlDAV70WYv2uxF2zD04oYbbvhZZj6r27yhCPrR0VGuv/76QZdBq9VifHx80GUMBXvRZi/a7EXbMPQiIu7qZZ5LN5JUOINekgpn0EtS4Qx6SSqcQS9Jhesa9BGxPCK+GxE3R8TtEfGRavyYiPhORPwwIr4QEU+rxpdV53dVl4/29y5I6rd169axfPlyJiYmWL58OevWrRt0SZqDXl5euQ94ZWZOR8TBwLci4uvAeuD8zNwRERcBZwAXVj9/mZnPj4jTgM3AX/Wpfkl9tm7dOi666CI2b97MmjVruOOOO9iwYQMAF1xwwYCrUy+6HtHnjOnq7MHVKYFXAl+sxrcDb6q2T67OU13+qoiIxiqWtKC2bdvG5s2bWb9+PcuXL2f9+vVs3ryZbdu2Dbo09ainN0xFxFOBG4DnA58CfgT8KjP3V1OmgKOq7aOAewAyc39EPAA8E/jZE/a5FlgLMDIyQqvVqnVHmjA9PT0UdQwDe9G21Huxb98+1qxZQ6vV+m0v1qxZw759+5Z0XxbV4yIzez4BhwOTwJ8CuzrGVwG3Vtu3A0d3XPYj4JkH2u/xxx+fw2BycnLQJQwNe9G21HuxbNmy3LJlS2a2e7Fly5ZctmzZAKsavGF4XADXZw/ZPaePQMjMX0VEC3g5cHhEHJQzR/VHA/dW06aq4J+KiIOApwO/qPG7SNIAvfvd7/7tmvyaNWs477zz2LBhA2eeeeaAK1OvugZ9RDwLeKQK+UOAP2PmD6yTwJuBHcDpwGXVVS6vzn+7uvw/q988khahx//ges4557Bv3z6WLVvGmWee6R9iF5FeXke/EpiMiFuA7wHXZObXgA3A+ojYxcwa/MXV/IuBZ1bj64GNzZctaSFdcMEFPPzww0xOTvLwww8b8otM1yP6zLwFeOks4z8GTphl/GHg1EaqkyTV5jtjJalwBr0kFc6gl6TCGfSSVDiDXpIKZ9BLUuEMekkqnEEvSYUz6CWpcAa9JBXOoJekwhn0klQ4g16SCmfQS1LhDHpJKpxBL0mFM+glqXAGvSQVzqCXpMIZ9JJUOINekgpn0EtS4Qx6SSqcQS9Jhesa9BGxKiImI2JnRNweEe+txj8cET+JiJuq00kd1/lAROyKiB9ExGv7eQckSQd2UA9z9gPvz8wbI+Iw4IaIuKa67PzM/Hjn5IhYA5wGvAB4DvAfEfFHmflok4VLknrT9Yg+M/dk5o3V9l5gJ3DUAa5yMrAjM/dl5p3ALuCEJoqVJM1dZGbvkyNGgW8CLwTWA+8AHgSuZ+ao/5cR8Ungusz8t+o6FwNfz8wvPmFfa4G1ACMjI8fv2LGj7n2pbXp6mhUrVgy6jKFgL9rsRZu9aBuGXkxMTNyQmWPd5vWydANARKwAvgS8LzMfjIgLgY8CWf3cArwLiFmu/nu/TTJzK7AVYGxsLMfHx3stpW9arRbDUMcwsBdt9qLNXrQtpl709KqbiDiYmZD/bGZ+GSAz78vMRzPzMWAb7eWZKWBVx9WPBu5trmRJ0lz08qqbAC4GdmbmeR3jKzumnQLcVm1fDpwWEcsi4hhgNfDd5kqWJM1FL0s3rwDeBtwaETdVY+cAb4mI45hZltkNvAcgM2+PiEuBO5h5xc5ZvuJGkgana9Bn5reYfd39ygNc51zg3Bp1SZIa4jtjJalwBr0kFc6gl6TCGfSSVDiDXpIKZ9BLUuEMekkqnEEvSYUz6CWpcAa9JBXOoJekwhn0klQ4g16SCmfQS1LhDHpJKpxBL0mFM+glqXAGvSQVzqCXpMIZ9JJUOINekgpn0EtS4Qx6SSpc16CPiFURMRkROyPi9oh4bzV+RERcExE/rH4+oxqPiPhEROyKiFsi4mX9vhOSpCfXyxH9fuD9mXks8HLgrIhYA2wErs3M1cC11XmA1wGrq9Na4MLGq5Yk9axr0Gfmnsy8sdreC+wEjgJOBrZX07YDb6q2TwY+kzOuAw6PiJWNVy5J6smc1ugjYhR4KfAdYCQz98DMLwPg2dW0o4B7Oq42VY1JkgbgoF4nRsQK4EvA+zLzwYh40qmzjOUs+1vLzNIOIyMjtFqtXkvpm+np6aGoYxjYi7al0ouJiYlG9jM5OdnIfobdYnpc9BT0EXEwMyH/2cz8cjV8X0SszMw91dLM/dX4FLCq4+pHA/c+cZ+ZuRXYCjA2Npbj4+PzuwcNarVaDEMdw8BetC2VXmT+3vHY7xndeAW7N71+AaoZfovpcdHLq24CuBjYmZnndVx0OXB6tX06cFnH+NurV9+8HHjg8SUeSdLC6+WI/hXA24BbI+KmauwcYBNwaUScAdwNnFpddiVwErAL+DXwzkYrliTNSdegz8xvMfu6O8CrZpmfwFk165IkNcR3xkpS4Qx6SSqcQS9JhTPoJalwBr0kFc6gl6TCGfSSVDiDXpIKZ9BLUuEMekkqnEEvSYUz6CWpcAa9JBXOoJekwhn0klQ4g16SCmfQS1LhDHpJKpxBL0mFM+glqXAGvSQVzqCXpMIZ9JJUOINekgrXNegj4pKIuD8ibusY+3BE/CQibqpOJ3Vc9oGI2BURP4iI1/arcElSb3o5ov80cOIs4+dn5nHV6UqAiFgDnAa8oLrOv0TEU5sqVpI0d12DPjO/Cfyix/2dDOzIzH2ZeSewCzihRn2SpJrqrNGfHRG3VEs7z6jGjgLu6ZgzVY1JkgbkoHle70Lgo0BWP7cA7wJilrk52w4iYi2wFmBkZIRWqzXPUpozPT09FHUMA3vRZi9+l72YsZgeF/MK+sy87/HtiNgGfK06OwWs6ph6NHDvk+xjK7AVYGxsLMfHx+dTSqNarRbDUMcwsBdt9qLDVVfYi8pielzMa+kmIlZ2nD0FePwVOZcDp0XEsog4BlgNfLdeiZKkOroe0UfE54Fx4MiImAI+BIxHxHHMLMvsBt4DkJm3R8SlwB3AfuCszHy0P6VLknrRNegz8y2zDF98gPnnAufWKUqS1BzfGStJhTPoJalwBr0kFc6gl6TCGfSSVDiDXpIKZ9BLUuEMekkqnEEvSYUz6CWpcAa9JBXOoJekws33i0ckLTIv+cjVPPCbR2rvZ3TjFfO+7tMPOZibP/Sa2jVobgx6aYl44DePsHvT62vto+6XbdT5JaH5c+lGkgpn0EtS4Qx6SSqcQS9JhTPoJalwBr0kFc6gl6TCGfSSVDiDXpIKZ9BLUuG6Bn1EXBIR90fEbR1jR0TENRHxw+rnM6rxiIhPRMSuiLglIl7Wz+IlSd31ckT/aeDEJ4xtBK7NzNXAtdV5gNcBq6vTWuDCZsqUJM1X16DPzG8Cv3jC8MnA9mp7O/CmjvHP5IzrgMMjYmVTxUqS5m6+a/QjmbkHoPr57Gr8KOCejnlT1ZgkaUCa/pjimGUsZ50YsZaZ5R1GRkZotVoNlzJ309PTQ1HHMLAXbSX1ou79aKIXpfRyMT0u5hv090XEyszcUy3N3F+NTwGrOuYdDdw72w4ycyuwFWBsbCzrfMZ1U+p+1nZJ7EVbMb246ora96N2LxqoYSFEzHbMOneZsx7nLrj5Lt1cDpxebZ8OXNYx/vbq1TcvBx54fIlHGkYR0fU0MTHRdY7KkpldT8/d8LWuc4ZF1yP6iPg8MA4cGRFTwIeATcClEXEGcDdwajX9SuAkYBfwa+Cdfah5Xkr7DV2HvWjr5T6Mbryi9jczSYPUNegz8y1PctGrZpmbwFl1i+oHn9Bt9kJaWnxnrCQVzi8Hl5aIw47dyIu2b+w+sZvt3ac8eQ0A/k9xoRn00hKxd+em2stxdV91M7rxilq3r/lx6UaSCmfQS1LhDHpJKpxBL0mFM+glqXAGvSQVzqCXpMIZ9JJUON8wJWnJeclHruaB3zxSez913gD29EMO5uYPvaZ2Db0w6CUtOQ/85pEl9S5hg75AS+1o5UDshVRQ0PuEbltqRysHYi+kgoLeJ7Qkzc5X3UhS4Yo5opfUXSP/67yq3vKmFp5BLy0RTXw1pF8xuTi5dCNJhTPoJalwBr0kFc6gl6TCGfSSVLhar7qJiN3AXuBRYH9mjkXEEcAXgFFgN/CXmfnLemVKkuariSP6icw8LjPHqvMbgWszczVwbXVekjQg/Vi6ORnYXm1vB97Uh9uQJPWo7humErg6IhL418zcCoxk5h6AzNwTEc+uW6QkNemwYzfyou0NLDZs7z7lyWsAWJg3n9UN+ldk5r1VmF8TEd/v9YoRsRZYCzAyMkKr1apZCrX3MT09XXsfTdyPJtiLGcPyhG61Dq1fw5AYhn/Xuvbu3MSnT6z3bzI9Pc2KFSvmff13XPXQgvWyVtBn5r3Vz/sj4ivACcB9EbGyOppfCdz/JNfdCmwFGBsbyzqfGgnAVVfU+uRJqP/plU3U0ITD7noR6+5qYEc/r1HDsTA+fmsDRdSzd+OmofhU0/HT53/9oTIkj/HallhezDvoI+JQ4CmZubfafg3wT8DlwOnApurnZU0U2s2wHLkt1H/FDmTvzuEIN0nDoc4R/QjwlYh4fD+fy8yrIuJ7wKURcQZwN3Bq/TK7M9wkaXbzDvrM/DHwklnGfw68qk5RkqTm+M5YSSqcQS9JhTPoJalwBr0kFc6gl6TCGfSSVDiDXpIKZ9BLUuEMekkqnEEvSYWr+zHFkrQoNfLZVFfNfx9PP+Tg+rffI4NexVtKT2j1pu4HIMLM46qJ/SyEooLeJ3SbvZix1J7Q0myKCXqf0G32QlIn/xgrSYUz6CWpcAa9JBXOoJekwhn0klQ4g16SCmfQS1LhDHpJKpxBL0mFM+glqXB9C/qIODEifhARuyJiY79uR5J0YH35rJuIeCrwKeDVwBTwvYi4PDPv6MftSVKTIqK3eZsPfHlmNlBNff06oj8B2JWZP87M/wN2ACf36bYkNSAiup7u2vyGrnNKkJldT5OTk13nDIt+fXrlUcA9HeengD/u021pjko7WqnDXrT1ch9arRbj4+P9L0aNin48QCPiVOC1mfk31fm3ASdk5rqOOWuBtQAjIyPH79ixo/E6Ok1MTDSyn8nJyUb2M+ymp6dZsWLFoMsYCvaizV60DUMvJiYmbsjMsW7z+nVEPwWs6jh/NHBv54TM3ApsBRgbG8t+HyV4tDI39qLNXrTZi7bF1It+rdF/D1gdEcdExNOA04DL+3RbkqQD6MsRfWbuj4izgW8ATwUuyczb+3FbkqQD69tXCWbmlcCV/dq/JKk3vjNWkgpn0EtS4Qx6SSqcQS9JhTPoJalwfXln7JyLiPgpcNeg6wCOBH426CKGhL1osxdt9qJtGHrx3Mx8VrdJQxH0wyIiru/l7cRLgb1osxdt9qJtMfXCpRtJKpxBL0mFM+h/19ZBFzBE7EWbvWizF22Lpheu0UtS4Tyil6TCGfRzEBGnRsTtEfFYRCyKv7b3S0R8LCK+HxG3RMRXIuLwQdc0KBHx0aoPN0XE1RHxnEHXNGgR8fcRkRFx5KBrGZSI+HBE/KR6XNwUEScNqhaDfm5uA/4c+OagCxkC1wAvzMwXA/8DfGDA9QzSxzLzxZl5HPA14B8HXdAgRcQq4NXA3YOuZQicn5nHVaeBfZpvkUEfEaMRsTMitlVH4FdHxCER0Xr8SDwijoyI3dX2OyLi3yPiqxFxZ0ScHRHrI+K/I+K6iDgCIDN3ZuYPBnjX5qyPvbg6M/dXN3MdM98iNtT62IsHO27mUGDo//DVr15Uzgf+gUXQB+h7L4ZCkUFfWQ18KjNfAPwK+Isu818I/DVwAnAu8OvMfCnwbeDt/Sx0AfS7F+8Cvt5cuX3Vl15ExLkRcQ/wVhbPEX3jvYiINwI/ycyb+1Z1f/TrOXJ2tax3SUQ8ow9196TkoL8zM2+qtm8ARrvMn8zMvZn5U+AB4KvV+K09XHfY9a0XEfFBYD/w2caq7a++9CIzP5iZq5jpw9mNVtw/jfYiIv4A+CCL5xddp348Li4EngccB+wBtjRZ8FyUHPT7OrYfZebbtPbTvs/LDzD/sY7zj9HHb+JaIH3pRUScDrwBeGsuntfp9vtx8Tm6Hw0Oi6Z78TzgGODmapnjaODGiPjDZsvui8YfF5l5X2Y+mpmPAduYOfofiJKDfja7geOr7TcPsI5hsJsavYiIE4ENwBsz89cN1jUIu6nXi9UdZ98IfL+BmgZlN/PsRWbempnPzszRzBwFpoCXZeb/NlvigtlNvcfFyo6zpzDzYo6BWGpB/3HgbyPiv5j55Lk5iYhTImIK+BPgioj4RtMFLqBavQA+CRwGXFO9dOyiRqtbWHV7sSkibouIW4DXAO9ttLqFVbcXJanbi3+OiFurx8UE8HeNVjcHvjNWkgq31I7oJWnJMeglqXAGvSQVzqCXpMIZ9JJUOINekgpn0EtS4Qx6SSrc/wM/Yrp+f5QpWwAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "df1.loc[7:9,\"num4\"]=[100,200,300] #定义异常值\n",
    "#from pandas.tools.plotting import scatter_matrix\n",
    "#pd.plotting.scatter_matrix(df1,marker='o', alpha=0.2, figsize=(6, 6), diagonal='kde')# 画出散点图粗略看是否存在异常值\n",
    "df1.boxplot() #箱体图常用来检测异常值"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 五、基于python的数据预处理\n",
    "现实世界中数据大体上都是不完整，不一致的脏数据，无法直接进行数据分析，或挖掘结果差强人意。为了提高数据挖掘的质量产生了数据预处理技术。\n",
    "数据预处理有多种方法：数据清理，数据集成，数据变换，数据归约等。"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 5.1 标准化（Standardization）\n",
    "特征$X$标准化的公式为：\n",
    "$$\n",
    "X_i=\\frac{X_i-mean(X)}{std(X)},\\quad X=(X_1,X_2,...,X_d)\n",
    "$$\n",
    "一般会把train和test集放在一起做标准化，或者在train集上做标准化后，用同样的标准化器去标准化test集，此时可以用scaler"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 20 entries, 0 to 19\n",
      "Data columns (total 5 columns):\n",
      "num1    20 non-null float64\n",
      "num2    20 non-null float64\n",
      "num3    20 non-null float64\n",
      "num4    20 non-null float64\n",
      "num5    20 non-null float64\n",
      "dtypes: float64(5)\n",
      "memory usage: 928.0 bytes\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>num1</th>\n",
       "      <th>num2</th>\n",
       "      <th>num3</th>\n",
       "      <th>num4</th>\n",
       "      <th>num5</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>-1.647509</td>\n",
       "      <td>-1.647509</td>\n",
       "      <td>-1.647509</td>\n",
       "      <td>-1.380302</td>\n",
       "      <td>-1.647509</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>-1.474087</td>\n",
       "      <td>-1.474087</td>\n",
       "      <td>-1.474087</td>\n",
       "      <td>-0.174635</td>\n",
       "      <td>-1.474087</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>-1.300665</td>\n",
       "      <td>-1.300665</td>\n",
       "      <td>-1.300665</td>\n",
       "      <td>1.068320</td>\n",
       "      <td>-1.300665</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>-1.127243</td>\n",
       "      <td>-1.127243</td>\n",
       "      <td>-1.127243</td>\n",
       "      <td>2.311276</td>\n",
       "      <td>-1.127243</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>-0.953821</td>\n",
       "      <td>-0.953821</td>\n",
       "      <td>-0.953821</td>\n",
       "      <td>-1.131711</td>\n",
       "      <td>-0.953821</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       num1      num2      num3      num4      num5\n",
       "0 -1.647509 -1.647509 -1.647509 -1.380302 -1.647509\n",
       "1 -1.474087 -1.474087 -1.474087 -0.174635 -1.474087\n",
       "2 -1.300665 -1.300665 -1.300665  1.068320 -1.300665\n",
       "3 -1.127243 -1.127243 -1.127243  2.311276 -1.127243\n",
       "4 -0.953821 -0.953821 -0.953821 -1.131711 -0.953821"
      ]
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import sklearn\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "scaler = StandardScaler().fit(df1)\n",
    "df1_1=scaler.transform(df1)\n",
    "# sdf1_2=scaler.transform(testdata)# 操作中df1用测试集代替\n",
    "df1_1=pd.DataFrame(df1_1,columns=df1.columns) # 转换为数据框类型\n",
    "df1_1.info()\n",
    "df1_1.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 5.2  最小-最大规范化（MinMaxScaler）\n",
    "最小-最大规范化对原始数据进行线性变换，变换到[0,1]区间（也可以是其他固定最小最大值的区间）\n",
    "$$\n",
    "X_i=\\frac{X_i-X_{min}}{X_{max}-X_{min}}\n",
    "$$"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from sklearn.preprocessing import MinMaxScaler\n",
    "scaler = MinMaxScaler()\n",
    "scaler.fit(df1) #scaler.fit(X_train)\n",
    "traindata=scaler.transform(df1)#X_train_pp = scaler.transform(X_train),切记这时数据变为数组类型了\n",
    "#X_test_pp = scaler.transform(X_test)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 5.3 规范化(Normalization)\n",
    "规范化是将不同变化范围的值映射到相同的固定范围，常见的是[0,1]，此时也称为归一化。\n",
    "$$\n",
    "X_i=\\frac{X_i}{\\|X\\|_2}\n",
    "$$"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1.0"
      ]
     },
     "execution_count": 55,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn import preprocessing\n",
    "#normalizer = preprocessing.Normalizer(norm=\"l2\")  # 函数实例化\n",
    "#df_exam= normalizer.fit(df1) #建立变换标准\n",
    "#df_train_norm2= df_exam.transform(df1) #对df1进行规范化变换\n",
    "#df_test_norm2=df_exam.transform(testdata),对测试数据进行规范变换\n",
    "#np.sum(df_train_norm2[:,1]**2)\n",
    "#help(preprocessing.Normalizer)\n",
    "X_train_norm = preprocessing.normalize(df1, norm='l2', axis=0)  # 直接用标准化函数(这里axis=0指定的是列)\n",
    "#np.sum(X_train_norm[:,2]**2)\n",
    "#测试集如何规范化呢？"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 5.3 类别特征编码\n",
    "有时候特征是类别型的，而一些算法的输入必须是数值型，此时需要对其编码。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'df' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-56-dd89c116f523>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mnewdf\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_dummies\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m\"state\"\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mdummy_na\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      2\u001b[0m \u001b[0mnewdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mhead\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mNameError\u001b[0m: name 'df' is not defined"
     ]
    }
   ],
   "source": [
    "newdf=pd.get_dummies(df,columns=[\"state\"],dummy_na=True)\n",
    "newdf.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 5.4 连续变量离散化\n",
    "数据分析和统计的预处理阶段，经常的会碰到年龄、消费等连续型数值，我们希望将数值进行离散化分段统计，提高数据区分度。\n",
    "在机器学习中，在决策树算法常用这种方式，这里介绍pd.cut()函数的用法。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[30 88 98 49 51 42 86 44 84 39 67 50 59 90 83 42 39 68 80 48]\n"
     ]
    }
   ],
   "source": [
    "score_list = np.random.randint(25, 100, size=20) # 随机生成整数序列\n",
    "print(score_list)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[(0, 59], (80, 100], (80, 100], (0, 59], (0, 59], ..., (0, 59], (0, 59], (59, 70], (70, 80], (0, 59]]\n",
      "Length: 20\n",
      "Categories (4, interval[int64]): [(0, 59] < (59, 70] < (70, 80] < (80, 100]]\n"
     ]
    }
   ],
   "source": [
    "bins = [0, 59, 70, 80, 100] #　指定多个区间\n",
    "score_cut = pd.cut(score_list, bins)\n",
    "# print(type(score_cut)) #此时数据为类别型\n",
    "print(score_cut)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(0, 59]      11\n",
      "(80, 100]     6\n",
      "(59, 70]      2\n",
      "(70, 80]      1\n",
      "dtype: int64\n"
     ]
    }
   ],
   "source": [
    "print(pd.value_counts(score_cut))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>score</th>\n",
       "      <th>Categories</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>30</td>\n",
       "      <td>low</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>88</td>\n",
       "      <td>perfect</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>98</td>\n",
       "      <td>perfect</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>49</td>\n",
       "      <td>low</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>51</td>\n",
       "      <td>low</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>5</td>\n",
       "      <td>42</td>\n",
       "      <td>low</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>6</td>\n",
       "      <td>86</td>\n",
       "      <td>perfect</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>7</td>\n",
       "      <td>44</td>\n",
       "      <td>low</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>8</td>\n",
       "      <td>84</td>\n",
       "      <td>perfect</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>9</td>\n",
       "      <td>39</td>\n",
       "      <td>low</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>10</td>\n",
       "      <td>67</td>\n",
       "      <td>middle</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>11</td>\n",
       "      <td>50</td>\n",
       "      <td>low</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>12</td>\n",
       "      <td>59</td>\n",
       "      <td>low</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>13</td>\n",
       "      <td>90</td>\n",
       "      <td>perfect</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>14</td>\n",
       "      <td>83</td>\n",
       "      <td>perfect</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>15</td>\n",
       "      <td>42</td>\n",
       "      <td>low</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>16</td>\n",
       "      <td>39</td>\n",
       "      <td>low</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>17</td>\n",
       "      <td>68</td>\n",
       "      <td>middle</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>18</td>\n",
       "      <td>80</td>\n",
       "      <td>good</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>19</td>\n",
       "      <td>48</td>\n",
       "      <td>low</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    score Categories\n",
       "0      30        low\n",
       "1      88    perfect\n",
       "2      98    perfect\n",
       "3      49        low\n",
       "4      51        low\n",
       "5      42        low\n",
       "6      86    perfect\n",
       "7      44        low\n",
       "8      84    perfect\n",
       "9      39        low\n",
       "10     67     middle\n",
       "11     50        low\n",
       "12     59        low\n",
       "13     90    perfect\n",
       "14     83    perfect\n",
       "15     42        low\n",
       "16     39        low\n",
       "17     68     middle\n",
       "18     80       good\n",
       "19     48        low"
      ]
     },
     "execution_count": 68,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_dic={\"score\":score_list}\n",
    "df=pd.DataFrame(df_dic)# 定义数据框类型\n",
    "df['Categories'] = pd.cut(df_dis['score'], bins, labels=['low', 'middle', 'good', 'perfect']) #命名离散化后的该列数据\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0 3 3 1 1 0 3 1 3 0 2 1 2 3 2 0 0 2 2 1]\n"
     ]
    }
   ],
   "source": [
    "#等频法\n",
    "scorequan = pd.qcut(score_list,4)  # 按四分位数进行切割，可以试试 pd.qcut(data,10)\n",
    "#pd.value_counts(scorequan)\n",
    "print(scorequan.codes) # 0-3对应分组后的四个区间，用代号来注释数据对应区间，结果为ndarray"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 六、数据清洗\n",
    "数据合并、转换、过滤、排序"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  level  numeber1\n",
      "0     a         1\n",
      "1     b         3\n",
      "2     c         5\n",
      "3     d         7\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "data1 = pd.DataFrame({'level':['a','b','c','d'],\n",
    "                 'numeber1':[1,3,5,7]})\n",
    "\n",
    "data2=pd.DataFrame({'level':['a','b','c','e'],\n",
    "                 'numeber2':[2,3,6,10]})\n",
    "print(data1)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  level  numeber\n",
      "0     a        2\n",
      "1     b        3\n",
      "2     c        6\n",
      "3     e       10\n"
     ]
    }
   ],
   "source": [
    "print(data2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  level  numeber1  numeber2\n",
      "0     a         1         2\n",
      "1     b         3         3\n",
      "2     c         5         6\n"
     ]
    }
   ],
   "source": [
    "print(pd.merge(data1,data2)) #可以看到data1和data2中用于相同标签的字段显示，而其他字段则被舍弃"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  level1  numeber1 level2  numeber2\n",
      "0      a         1      a         2\n",
      "1      b         3      b         3\n",
      "2      c         5      c         6\n"
     ]
    }
   ],
   "source": [
    "data3 = pd.DataFrame({'level1':['a','b','c','d'],\n",
    "                 'numeber1':[1,3,5,7]})\n",
    "data4=pd.DataFrame({'level2':['a','b','c','e'],\n",
    "                 'numeber2':[2,3,6,10]})\n",
    "print(pd.merge(data3,data4,left_on='level1',right_on='level2'))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "两个数据框中如果列名不同的情况下，我们可以通过指定letf_on 和right_on两个参数把数据连接在一起"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 重叠数据合并\n",
    "有时候我们会遇到重叠数据需要进行合并处理，此时可以用comebine_first函数。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  level  numeber1  numeber2\n",
      "0     a       1.0       2.0\n",
      "1     b       3.0       NaN\n",
      "2     c       5.0       6.0\n",
      "3     d       NaN      10.0\n"
     ]
    }
   ],
   "source": [
    "data3 = pd.DataFrame({'level':['a','b','c','d'],\n",
    "                 'numeber1':[1,3,5,np.nan]})\n",
    "data4=pd.DataFrame({'level':['a','b','c','e'],\n",
    "                 'numeber2':[2,np.nan,6,10]})\n",
    "print(data3.combine_first(data4))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "可以看到相同标签下的内容优先显示data3的内容"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 数据替换\n",
    "除了使用我们上面提到的fillna的方法外，还可以用replace方法，而且更简单快捷。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "    a   b\n",
      "0  20  20\n",
      "1   3   3\n",
      "2   3   3\n",
      "3   4   5\n"
     ]
    }
   ],
   "source": [
    "data=pd.DataFrame({'a':[1,3,3,4],\n",
    "              'b':[1,3,3,5]})\n",
    "print(data.replace(1,20)) "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 七、财经数据模块tushare"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Int64Index: 365 entries, 183 to 547\n",
      "Data columns (total 7 columns):\n",
      "date      365 non-null object\n",
      "open      365 non-null float64\n",
      "close     365 non-null float64\n",
      "high      365 non-null float64\n",
      "low       365 non-null float64\n",
      "volume    365 non-null float64\n",
      "code      365 non-null object\n",
      "dtypes: float64(5), object(2)\n",
      "memory usage: 22.8+ KB\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>open</th>\n",
       "      <th>close</th>\n",
       "      <th>high</th>\n",
       "      <th>low</th>\n",
       "      <th>volume</th>\n",
       "      <th>code</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>543</th>\n",
       "      <td>2020-03-30</td>\n",
       "      <td>17.01</td>\n",
       "      <td>17.20</td>\n",
       "      <td>17.20</td>\n",
       "      <td>16.92</td>\n",
       "      <td>485501.0</td>\n",
       "      <td>601688</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>544</th>\n",
       "      <td>2020-03-31</td>\n",
       "      <td>17.51</td>\n",
       "      <td>17.23</td>\n",
       "      <td>17.58</td>\n",
       "      <td>17.23</td>\n",
       "      <td>515348.0</td>\n",
       "      <td>601688</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>545</th>\n",
       "      <td>2020-04-01</td>\n",
       "      <td>17.22</td>\n",
       "      <td>17.32</td>\n",
       "      <td>17.59</td>\n",
       "      <td>17.20</td>\n",
       "      <td>497352.0</td>\n",
       "      <td>601688</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>546</th>\n",
       "      <td>2020-04-02</td>\n",
       "      <td>17.24</td>\n",
       "      <td>17.67</td>\n",
       "      <td>17.67</td>\n",
       "      <td>17.24</td>\n",
       "      <td>563641.0</td>\n",
       "      <td>601688</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>547</th>\n",
       "      <td>2020-04-03</td>\n",
       "      <td>17.66</td>\n",
       "      <td>17.47</td>\n",
       "      <td>17.66</td>\n",
       "      <td>17.39</td>\n",
       "      <td>374464.0</td>\n",
       "      <td>601688</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "           date   open  close   high    low    volume    code\n",
       "543  2020-03-30  17.01  17.20  17.20  16.92  485501.0  601688\n",
       "544  2020-03-31  17.51  17.23  17.58  17.23  515348.0  601688\n",
       "545  2020-04-01  17.22  17.32  17.59  17.20  497352.0  601688\n",
       "546  2020-04-02  17.24  17.67  17.67  17.24  563641.0  601688\n",
       "547  2020-04-03  17.66  17.47  17.66  17.39  374464.0  601688"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import tushare as ts\n",
    "ht=ts.get_k_data(code=\"601688\",start='2018-1-1',end='2020-4-7',autype='qfq')\n",
    "ht.info()\n",
    "ht.tail()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAD8CAYAAABn919SAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAIABJREFUeJzsnXd4HNW5/z9n+0raVZfcLRs3sDFumB4DpgcIkEJoMSEBQiD0XHIvvxBIv4RLuJCQBELNJaRQAgRDgNCLDTbYuGOMZVuW1aXV9p1yfn/MaCVZktWl9XI+z+PHu3NmZs9qd7/zznveIqSUKBQKhWL/xzHaE1AoFArF0KAEXaFQKLIEJegKhUKRJShBVygUiixBCbpCoVBkCUrQFQqFIktQgq5QKBRZghJ0hUKhyBKUoCsUCkWW4BrJFyspKZEVFRUj+ZIKhUKx37N69eoGKWVpb/uNqKBXVFSwatWqkXxJhUKh2O8RQuzoy37K5aJQKBRZghJ0hUKhyBKUoCsUCkWWMKI+dIVCoegrmqZRVVVFIpEY7amMGD6fjwkTJuB2uwd0vBJ0hUKRkVRVVREIBKioqEAIMdrTGXaklDQ2NlJVVcWUKVMGdA7lclEoFBlJIpGguLj4cyHmAEIIiouLB3VHogRdoVBkLJ8XMW9jsO+3V0EXQkwUQrwmhNgkhNgghLimw9j3hBBb7O23D2omimHhjV1vUBOtGe1pKBSKEaAvFroO3CClPBA4HLhSCHGQEOI44EvAXCnlbOCOYZynYgBIKbn29Wv5+yd/H+2pKBSKEaDXRVEp5R5gj/04LITYBIwHLgV+KaVM2mN1wzlRRf9JmSl0UyemxUZ7KgqFYgTolw9dCFEBzAdWAjOAY4QQK4UQbwghDu3hmMuEEKuEEKvq6+sHO19FP0jo1uJK0kiO8kwUiv2XO++8kzlz5jBnzhzuuusuKisrmTVrFsuWLWPu3Ll85StfIRazjKbVq1ezZMkSFi5cyMknn8yePXsAOPbYY7nppptYvHgxM2bM4K233hqWufY5bFEIkQc8CVwrpWwVQriAQiw3zKHA34QQU6WUsuNxUsr7gPsAFi1aJFGMGG1CrgRdsb9z23Mb2FjdOqTnPGhckB+dMXuf+6xevZqHHnqIlStXIqXksMMOY8mSJWzZsoUHHniAo446iksuuYR7772Xa665hu9973s888wzlJaW8te//pWbb76ZBx98EABd13n//fdZvnw5t912G6+88sqQvh/oo6ALIdxYYv6YlPIpe3MV8JQt4O8LIUygBFBmeIaQ1C0hb7PUFQpF/3j77bc5++yzyc3NBeCcc87hrbfeYuLEiRx11FEAXHjhhdx9992ccsoprF+/nhNPPBEAwzAYO3Zs+lznnHMOAAsXLqSysnJY5turoAsrjuYBYJOU8s4OQ/8AjgdeF0LMADxAw7DMUjEgEoYl5CkjNcozUSgGR2+W9HCxl8Mhzd7hhUIIpJTMnj2b9957r9tjvF4vAE6nE13Xh3aiNn3xoR8FXAQcL4RYY/87DXgQmCqEWA/8BVi2t7tFMbq0uVrahF2hUPSPL3zhC/zjH/8gFosRjUZ5+umnOeaYY9i5c2dauB9//HGOPvpoZs6cSX19fXq7pmls2LBhROfblyiXt4Geot0vHNrpKIYStSiqUAyOBQsWcPHFF7N48WIAvv3tb1NYWMiBBx7II488wuWXX8706dO54oor8Hg8PPHEE1x99dWEQiF0Xefaa69l9uyRu7tQtVyymLSFrnzoCsWAuf7667n++uvTzysrK3E4HPz+97/vsu+8efN48803u2x//fXX049LSkqGzYeuUv+zmDZXi7LQFYrPB0rQsxjlclEohp6KigrWr18/2tPoFiXoWYyKQ1coPl8oQc9i0ha63lnQL1x+IT9690eY0hyNaSkUimFCCXoW013YopSStfVreWrrU7y+6/VRmplCoRgOVJRLFtMm5JqpYZgGv1nzG1bVrEqPN8RVHphCkU0oQc9iOrpakkaSP677Y+dx5VtXKPrNrbfeSl5eHjfeeONoT6ULyuWSxXSMP+8u/V8JukKRXShBz2JiWrugd5f+rwRdoeidRx99lLlz53LIIYdw0UUXdRpbs2YNhx9+OHPnzuXss8+mubkZgLvvvpuDDjqIuXPn8vWvfx2AaDTKJZdcwqGHHsr8+fN55plnhnyuyuWSxUQ7CHp34q0EXbHf8MIPoGbd0J5zzMFw6i/3ucuGDRv42c9+xjvvvENJSQlNTU3cfffd6fFvfOMb3HPPPSxZsoRbbrmF2267jbvuuotf/vKXbN++Ha/XS0tLCwA/+9nPOP7443nwwQdpaWlh8eLFnHDCCelKjkOBstCzmJgWTz/uLv1/73BGhULRmVdffZWvfOUrlJSUAFBUVJQeC4VCtLS0sGTJEgCWLVuWTvufO3cuF1xwAf/3f/+Hy2XZzS+99BK//OUvmTdvHsceeyyJRIKdO3cO6XyVhZ7FxHVloSuyhF4s6eFCStmlVG5feP7553nzzTd59tln+clPfsKGDRuQUvLkk08yc+bMYZiphbLQsxgl6ArF4Fi6dCl/+9vfaGxsBKCpqSk9lp+fT2FhYbqd3J/+9CeWLFmCaZrs2rWL4447jttvv52WlhYikQgnn3wy99xzT7rG+kcffTTk81UWehaT1BNIw4twJrttFK0EXaHYN7Nnz+bmm29myZIlOJ1O5s+fT0VFRXr8kUce4Tvf+Q6xWIypU6fy0EMPYRgGF154IaFQCCkl1113HQUFBfzwhz/k2muvZe7cuUgpqaio4J///OeQzlcJehbjaNlMoQktTmhKNHUaEwgl6ApFH1i2bBnLli3rdmzevHmsWLGiy/a33367yza/388f/vCHIZ9fR5TLJYsxjCRjTMvt0pho7DQW8ATUoqhCkWUoQc9iNAGFpgFAXayu01jQnata0ykUWYZyuWQxKSEoNKyKirXR2k5jgaYdpGT/V+8VCkXmoiz0LEYTgjzTBNNJTayzoAdNk4Qe7+FIhUKxP6IEPYvRELglOExvFws9aJrEDX2UZqZQKIYDJehZjC7ALSVuw0VjonOp3KBpEta6FuxSKBT7L70KuhBiohDiNSHEJiHEBiHENXuN3yiEkEKIkuGbpmIgaELgRuI1O3/Mbinxmya6MEZpZgrF/kFlZSVz5szpsv2WW27hlVde2eext956K3fcccdwTa1b+rIoqgM3SCk/FEIEgNVCiJellBuFEBOBE4GhLUigGDSmoaELgVtKck1o7TDmlRKPlOgoQVcoBsKPf/zj0Z5Ct/RqoUsp90gpP7Qfh4FNwHh7+NfAfwBy2GaoGBB6KgqAW0K+2dlX7pESn5QYQmKYStQVin1hGAaXXnops2fP5qSTTiIej3PxxRfzxBNPALB8+XJmzZrF0UcfzdVXX83pp5+ePnbjxo0ce+yxTJ06tVOVxuGiX2GLQogKYD6wUghxJrBbSrl2IMVrFMNLSmsTdEmZmWAzMCkwhZ3h7WkLHSBlpvA7/P0+vylNnt76NEkjyfkHnj+UU1couvDf7/83m5s2D+k5ZxXN4qbFN/W639atW3n88ce5//77+drXvsaTTz6ZHkskElx++eW8+eabTJkyhfPOO6/TsZs3b+a1114jHA4zc+ZMrrjiCtxu95C+j470eVFUCJEHPAlci+WGuRm4pQ/HXSaEWCWEWFVfXz/giSr6h2bXbnFLyRhpifvOGj8O04HXtCx06LmErmEamNLs8fwvbH+BW9+7lV+8/4t0sSGFIhuZMmUK8+bNA2DhwoVUVlamxzZv3szUqVOZMmUKQBdB/+IXv4jX66WkpISysjJqaztHmw01fbLQhRBuLDF/TEr5lBDiYGAK0GadTwA+FEIsllLWdDxWSnkfcB/AokWL1C9/hGgT9DBBys0I4CWVCpLrc+CREret1T1li37/ze/jc/r4+TE/73Z8S/OW9OO4HifHnTOk81coOtIXS3q48Hq96cdOp5N4vD1/ozdjZu9jdX14Q4V7FXRhKfYDwCYp5Z0AUsp1QFmHfSqBRVJK1UY+Q9B0S9CjrjLctoU+yQgTkyYeKTGkB+i+1yjA2vq1BD3BHs+/O7w7/bgl2aIEXfG5ZNasWXz22WdUVlZSUVHBX//611GdT19cLkcBFwHHCyHW2P9OG+Z5KQaJZncrirjKCDmtj3mZeJugTOKSDpzSCfTca7QuVtclGakj1ZHq9OPmRPNQTl2h2G/w+/3ce++9nHLKKRx99NGUl5eTn58/avPp1UKXUr4N7HPVU0pZMVQTUgwNbYKechZySUuYd/1+TolGeSKQi0+aOO3Y9O586LsjlvUd1sLEtFi31vfuyG5IlIKvnvpYU5dxhSIbqKioYP369ennN954Y5d9jjvuODZv3oyUkiuvvJJFixYBVhx6RzqeZ7hQmaJZimbXadFd+UzVdJZXVVNimMxLppibTOI0e7bQq8JV6ce1sa5WekyL0Zxs5kvadgBqIpkt6FuatnD9S7fz721rR3sqiizk/vvvZ968ecyePZtQKMTll18+anNR1RazFK2t/ZzDh/TlIxIhKs1ybm2wBPo5dwCwFjT3Zld4V/pxbayWKflTOo23WfCzkymeCUB9rHOt9UzjF+//gtW1q3l+4xa2XPvAaE9HkWVcd911XHfddaM9DUBZ6FlLyhZq4fAi/IUA7JZWdYakdOGyfeh7C/oHNR/wy/fbG/J250dvE/QDUymQ0BDLbB96OBUGQDi7tuFTZDaft5DYwb5fJehZSpuF7nR6wRb0FnJ5cem/OCb5v+0ulw6NpHVT58fv/RiXcHHNAqtkT3cul7YIl4majsvw0JThi6Ixez1BCfr+hc/no7Gx8XMj6lJKGhsb8fl8Az6HcrlkKbrdL9QhfOAvAqBFBlg6dx5vHHYoW+7+HdDayUL/YM1DVLZWMqn2BC6c9U0eWv9Ql05HALubNuM3TYpME5/poiXZMiLvaaDE7KxZJej7FxMmTKCqqorPU0Kiz+djwoQJAz5eCXqW0m6h+8DdbqEX5XrwuBx4HFYcekcLvWq31ez2C7FqWt59iAAOIlqk03lNabK75TPG6ToCyDUETYnM/sEl4o3gEAinauixP+F2u9MZmIq+oVwuWYpmJww5nT7IsSz0b524AI/L+si9DquexP+s/h8OfuRgAGqEiUNKphhRxrx2A4FwHZFUZ0Ff+velvNr4MeN0q6jXATEvO2Mb2dC4YUTeV3+RUhK3g247Wujv7n6Xd3a/M0qzUiiGByXoWYpmu1yE05f2ofuCpelxp9ObTv8HS/hqUiHKDIPTHB8AkGea6QXFtn0a4lYycKFhEJNeTm5x4RZ+nvjkieF+SwMiaSSRdis+4dBI2n+X3679Lfd8dM8oz06hGFqUoGcpbYLucvnTgp7+H8Dpxtthsak11UpNsoUxukGesNwweabJ7qb2RdGo7Yu2xiQ7xRjGEyHoGktNtFMJn4yhbY1gjF1DoyFqLeA2xZtojGd2uKVC0V+UoGcpbTVanK6c9KJo+n9AuDx4OwQPNMQbqNEjaeHbZE4iYEpiWii9T9vi53H+CVzZ0sIex1hKRZhoLIfqcNfF00wgZte0GWO7iKrDlojvbm2gJlr/uYmgUHw+UIKepSTTPnQ/TDoMKo6BsgPT48LpwW+2i1ltrJZaM5EWvu9rl5Fnmmi0L5qGUpa4n+0Zg0f6CDmLKRVNhCN+Pmtur+2SScQT1pzbLlQ1kSZSRgrpSIAwaE217utwhWK/Qgl6lpKya7Q43TlQWAEX/xP8Belxh8udrokO8EnTJySRCKOAeYk/sF5OxSNdJNHTVmzIFsd8XSPmyKXSN5MAcQp1wBlBM7URe399JR63yhKMtS9UddFmmhLtpQrqY6pAqCJ7UIKepWiGJa5eV/dlbZ1ubycf+sqalQDkpfxMr5jEXefOw4cbKdrdFm0uF2ckSpgcKnMPAeAA01o4zUSfdNwW77GGZaHXx5o6VYfc1piZvn+FYiAoQc9SNFNDSInL4ex23OXyIDu0gn23+l0A8hJ5HHlACWfNH48PK7SxLdKlTdB9Oz+kXvMR84+nXpQwz7AWTrtLQhpt4nFrzhM1HSFhW2gLv1v7+/T4p02Z6SpSKAaCEvQsJWVouCW4Xd1/xC6PF6NDUWRTmuSbEqnnEfRbQu4XVreVtlj0kC3oU80WwtJPrs9N0YIvcTKfALByz/v811v/lVGul5htje/Rx1Ji6Kxo+Cev7Xo1Pb4zlHkXIYVioChBz1I0qeMCXI7uP2K3x4tuC3q+YVnquaZJmDzy2wTdbh7dkrQWDkOtVeSZJm4gTA65XifOo6+m3HZn3P3R//LcZ8/xUc2m4Xtj/SRuL3o+rp2STobqyJ7w8PZ4VChGEiXoWYpmargkuBzd9ybxeLy0yduNjZYVuyCRoFXmEPRZFSFynJb/vT5iLYa2hHeTb1jZSBHpI9frgsIKfL4JeMz219nRlDnFutoEPeqdTPlegj5G12mJ7hyNaSkUw4IS9CxFMw1cEpw9CLrL7UWzGnwzU0sydtt5/L+GJlrJSbtc8tx5ANTHLEEPxerJNy1RDIo4eR67FFBOKeNS7a8T1jKnWFc8ZSVDSWchhXp76aIc02SyphNOVfV0qEKx36EEPUtJmTpOCS5n94LucLnR7KE802SB2UKulDTKIJOKLMs84LGaYNTb2ZXhZIiAHbteLposCx1w5JVQobV3M28rD5AJxLQoDinBk0/QtMqSfi1wPCt2VDFR04iIzO62pFD0ByXoWYouDVxS9OhDx+mhyHaf5JiSmVQCEHMXMTbfEr5iXxFOKdljp/XH9DjCtKo0PmMclS705Q6WMyPVnoCUSfXR41oUv5Q4vAGC0rrjGJM0EMBE3SDh1KiLZM58FYrBoAQ9S4lLfZ8uF5xu7qmt59b6RopNk9mOSgACJeMRtivGlxNkjG5QE7UaWsQw0A0fByT+xP8ZJ5DQLPeLJ7+MOXp7nZfmROZYvXE9js+UeL0epslyrmtq5rwtj5OULsocQQDer/p0lGepUAwNStCzlAQGHunocVEUh5sxhsGXI5YQzxGVAJSMmZjexZsbZLyu0xC3YrVj0sQpneBwASIt6CK3lMPiCZyNizD1AKGUZfHWRGu4cPmF7Gpt71E60kSMBLkSvC4n+Mq4JBQmD53f6mcx0WNlzq6r3TZq81MohpJeBV0IMVEI8ZoQYpMQYoMQ4hp7+6+EEJuFEB8LIZ4WQhT0di7FyJGQJm5T9OhDx+lJPzRwkCOSJKSbKePK09vz8vIZp+s0Ja1Y7biQOE033zt+GmccMo4LD59s7ZhbQo6UlNcvxkwVE7Zrvtzx/j2srV/Lo+v+MTxvsg9EjBR+U+BxOpA5Vk/VT8zx3G2cw9ScUhwSNjdtGbX5KRRDSV8sdB24QUp5IHA4cKUQ4iDgZWCOlHIu8Anwn8M3TUV/iWPgNh378KG70w/3eCoAaCCfGWOC6e3Cm8c4XSdOiLgeJyEEwnQxNt/HPefNpyDHvijkWnXWb3T9HYeeQ6tu9YFcWb0KgDVVoxfrHZEafhM8LgfOXLtzk28CK/5zKYG8EqYlTXbGMrM5h0LRX3oVdCnlHinlh/bjMLAJGC+lfElK2RbasAIYeCM8xZCTQOKWjp596ML66D8zx9BYNB+AJhlgRnle+z6e3HQyztYm2y1hevC59yonYAv6ic7VTDNbaNGqmfvoXFp0y1UTl6Mn6FGp4zMdeF0O2m5WmoOzGJNvNf6Yn0zRpH+arn2jUOzP9MuHLoSoAOYDK/caugR4YWimpBgK4kLiMp24e3K52On8TxtHY0w8EoByR4jiPG/7Pp68dNnZD6o2W9tMDzmevVrR5pWlH/4qspozwpZfPs9Rjh6ZQcQcPUEPSwOvKfC4HDROO4fbta8RWniVNegvZHEigonGpqbMyW5VKAZKnwVdCJEHPAlcK6Vs7bD9Ziy3zGM9HHeZEGKVEGLV56l792iTQOKUzp4t9IO/yh2uy/idcSYFs74AQDF71Qb35JFvWqGNlc12RqXpJcezl4XuLyRxwbMAzNA0ft7QyJTtZ/Dd1JH4UgFa9T2j1kgiioHXdOB1OTl1XgVHf/MXfPXw6el5z01aZYY3Nm4clfkpFENJnwRdCOHGEvPHpJRPddi+DDgduED28IuVUt4npVwkpVxUWlra3S6KIaatMbLTcPbsQ3f7aZr9DXRclI6fwuOO03ly1p2d9/HkErQFvc4OXTRNL/69BR3wTV8C+ZPSz8/XN3DRrv/lHGM9mkzwmzW/4aH1Dw3NG+wjpjSJSonbcOJxORBCcOS0knRYJv5Cyg0DYfhYXbuaylDliM5PoRhqXL3tIKxv/wPAJinlnR22nwLcBCyRUsZ6Ol4x8rQ1RnZIV89RLsCtZ8zmyuOmEfC5Oe6aByjIcXfewZNL0E4+akxYbhPd9HW10NsIjoOQZcl/3fU6APPiJn8D7vv4PgCWzV6GQ4xMtGxMiyEFuE1nOgmqE/5CBOBLFPFi5Yu8WPki65atG5G5KRTDQV9+WUcBFwHHCyHW2P9OA34DBICX7W2/3+dZFCNGW2Nkh+nqOQ4dK/JjfIFVUXFMvq/rYqc3QK6UCAnNKSud3zD95Lh7sAOC4wBIyfbzeFKF5ItZ6eeVrZX9fTsDJqJZ6wRu04m3B0EH8Brt7yemKdtEsf/SlyiXt6WUQko5V0o5z/63XEo5TUo5scO274zEhBW90yboQrp69qH3BW8QgcBrumg1rGQh3fR163IB0oL+W/2s9KYAMcq1r3HuzHMBWFu3duDz6Sdtddxdpnufgn5EKDe9qT6u1nkU+y8qUzQLSQu66cLtHMRH7HCAL58c00ECy3LVzJx9uFzGA7DcPIwa3wEAFItWzMRY/uvQHxDwBFhbP4KCblvoDtPVvcsl10o0ul3/F/ftsVxK9bF9C3pzopmXd7zcqY2dQpEpKEHPQuK2ZSpM9+AsdABfPnmy/RxJIxf/3q6ZNmafzd3GV/hUjmP3ef8mecgyxroinN76F0K3H4LfnDyi4YFRzQqfdBqeni30Y24AoNReK6izBV1Kmb4wduShDQ9x/evX880Xv4kpzWGauUIxMJSgZyExu6kDpmefPvQ+4S8gaHYIYHIEcPR0zuBY7tTOQeKgLODFm19OnhnizMSzFCaraKjNZWvz1hFrURfWrF6ownR3b6EDHP9DuPApSg0rgeqzZisZ6omtT7D4scXURDs3ka4KW/XTt4W2saJ6xTDNXKEYGErQs5B4soOgD8blAuDLp8AOXXRJiXTl9nKARWnAC7mlOJCUCqu2S2nSh2ZqfNby2eDm1EeidnMLh+nB4+zhrkIIqDiGgClxmoLKlhp2te7iuW3PAXSZa22kmgWJBPmGwV/f/d9hnb9C0V+UoGchbV16TOkZApdLAQWmlS3qNyUOz74F/SK7YJfP7Uz7qGPSyj6dmrQuDJubNg9uTn2kzeWC2YPLpQ2XBzO3jDzDyctVf+e0p0/jo7qPAGhIWNE9eyJ7uPrVq/m4cQOTNJ0DUym2Nauyu4rMQgl6FhK3XQ3S9A6JyyXfTAFwYCoFvQj6T86aw/ZfnGY9sWu8PGKchIGDe3kMAVRFRqbtW0q3skAN09uzy8XGUTARvzQx6dx3dHd4N1JKLnv5Ml7b9RoARYaTvFQO9S5t1DJgFYruUIKehcRsy9Q0vftMLOoTvnySwrLQvxqO4PDk9HpIOhNzwqGsGnch9+lfxImJCwgaBu/vGJn66G13Kob09C7o+RP4YVMNwbojMFOF6e1VkSoaE42d4udlKkBhykPMKalqVWGOisxBCXoWEreTY0zT23Pqf1/xFXB5czNH1Y9haTSBz+vr+7FuHwdd/L/csex4zBzLWs8xnGyp3zO4OfWRt7buxi0lGj3EoXdk3Hy+EE/wfOSfODsKeriKukjn4mIpvZgpHqvM8Ds7RsZ9pFD0BSXoWUg0FcYlJbr0MFiPC/4CxhgGp4ZcJKWHqWV5vR/TgRyPi6UHliMufYVKs5ygAV5vovcDh4CaUAivlCTZR5RLG0dfS/SiF8l3JDi4vfYcVa07qWveCkBFyorOadHKmFtgVZhcU6P86IrMQQl6FhJKtRI0TaTD0+7+GCg+qxHVEsda6mQBM8oDAzqNKKxgo3MmRYZBwmzt/YAhwO3U8EhJClfvFjqQe8ARiDFzmGpYdzhjdZ26RAPPbV8OwG9q67m9rgG36yAOLByLU0p2tu4c1vdwzavX8PTWp1n+2XIM0+j9AMXnGiXoWUhLqpUCw0QX7t537g07Pb5ARLnf+CLT+mmhd+TURTMplRopGR78vPqA06Fbgi7dVk/RvlAwmWmGlZh1cUsrQd3BSzXv4ZCS8brOqdEY46YejMefz3jdoCE5PAu8L1a+yC3v3MKru17llndv4aa3buLcf57Lz1b8bFheL5MwTIPrX7+eNXVrRnsq+x1K0LMIU5o8suERquMN5JsGpsPT+0G9MflIWHITz/nP4u/GkgFb6ADCX0iJkUInPCLRIU5Hu4Xeq8uljcLJzEo1ATBB1znVbqKdZ1qLulHp5YDZC8GTx2RNI2x3ZRpqvv/G93n606c7bdvSvIW/bPnLsLxeJlEfr+flHS9z0QsX8aeNf1KRRP1ACXoWsaJ6BXesuoMNkZ3kGyamYwgsdLcfjvsv1sy+ifKCACUdOxr1F18+RYYBwkxncQ4nIi3objx9TbAqrGBxIsYdtfVMTeZzTMKaZ6udmBTKP5DjDxoH3iCTNY2YrB1xwYlrXUsSZBMdK17e/sHtvLfnvS4Zu4ruUYKeRZi01xbJN4dI0G1uPGkmz1x11OBO4i+g0M46bUm0DMGs9o1Ex9vmQ3f38ateUIEATo7F+Yt+AnPj7WUKPvPOYtw3HrCyb70BJms6pkgNS4XGfG9+p+eFKS8HJq18gOpw45C/XibRmuq8xnL5y5dz4hMnAvBm1Zsc/MjBNMQbhuS1NjdtZunflvZ4wWhNtaardu4PKEHPIpJGMv24YIgF3e9xDs46B/DlU2jXTKmPDb8oGTI1AAt9cvrhS9pcdjhm8e2WED+vbeH+mX+EkmnWoNdyuQDsaN0x1FOnPKe80/N9Pmu7AAAgAElEQVSTEg1c3mKVUNjRUjfkr5dJhFPd372ljBR/3vRnAD6u/3hIXmtF9Qrq4nWsqe/eX3/U40dxzF+OGZLXGgmUoGcRHX8I+YaJKYbAhz6U+PIpsKsajoSVaUgdj4SUdPW9pk1hBY3FC7kh9R0+kRNhwiKuaQ5xWNRh1adpwxtguh3GuKFh6PuRtlVyNDVrzWJ6Skv/7XaFhsY6zVTayh7vzbaWbeR5rEX5obLQP2n+JH3untClPiSvNRIoQc8iOgm6aWI6h85CHxJ8BeTaQtUYG34fuoHlQ0/Sj7+D082W0/7Gk6bVOLtw2mIAykQLp8we076fJ0CJaeJN5bKq5sOhnDZg+ZEFTi5rdnD/7ibOiJkU2mGLe8LZLejhHtxxm5s2E0lY358tTVuHJIyzL4IOHeoCZThK0LOIzha6Ac7Ms9Bz7FK8TfEREHTZ5kPv34Vt3sQCzjhkHLPHBSmdvji9/aBxwfadvJblXBAv5OOGtX1aGI1psbSA9LqvHqM4NpdroutYnFuO/4o3CTqsLN26WFM/3s3+R2vUysydlOpsGW9q2sSOFmvsb5/8hUv+dcmgXkczNT4LWdU0uxP0jp/pSLZOHAxK0LOIjotJHiCQ27dStyOGvwC/bFsUHf6FJgMjHbbYH3I8Lu45bz7PX30MOWOmWxvF3v1WrVv/w5OtNCcb2R3Z3et5H9nwCF9+9su8s/udXveNaTFKE5av3HHxs4iS6QSDExASGmPZ3S0pnGzGLSX/3F2NmSpOb19Xt4VWrf1i9mHdh4OKMKoO70YzNcr8pexs3YlmdK7T33FNanto+4BfZyRRgp5FdLTQ80yTgkCGCbo3CGMOAyAZGf4wNENYiUXLrz9h4CcRAr7xLFy5svN220K/SNsCwAtbe292Eaqx2u/d/sHt+9xPN3VSZorpsp5IwSzInwCAp7CCXBNaUsMfITSaRJIhAqaJAKRufYdLkl42120kZjTjNtuzn6sjA88DaKy1FlZnN+5Blzo1sc7fyY6/JyXoihGnNdXK1Pyp3FVyDIsSSYqCA8/qHBaEIHHMj3BJiREZ/oqLJiZeKcn1914hcp9MXQIl0ztv8+Qhj7yaaSkNvylZUfVBr6eR298AYGfrjn12bWprfTeVBpxTO0RY5E+gyDAIp7LdQm8lYIe3LpFWrZxTE/XorgQ6Ub7UDA9XW66XVdUDL47WbBsVC2z3394Xh453vI3x/SNUVAn6fk4oGUo3YwinwhT5iji2qZbPzLGUBDPMQge8ufnkmCZxY/gLdBnCwC0luAYZbtkdQiBO+gl86fcckkywvaX3hdGYXXpYlwY1+7hDaUusyZca/imHtw/klVNsasT07LbQW1PhtKCXmymElCxKtLs/DjSbmGaHjH5QPfAIoyY7f2B2yorv39i4sZOIh+Pt7p2G2P7xN1eCvp9zz6s38M0XvkEo0UI4FSbgzkXseIe3zTmdw+wyBL/fj19KEuZICLqJR5J2jwwHzjEHMSeZosGs7TXqIu5u/zx2hXu+Q4nplqDnmBImti/KklfGGN0gKbO7BntEi5Fnmpyb/CFfiOh8ozWcDhEFmKKnyDdNArqDjQ2DsNATltXtiZcgpOTO1Xdy2lOnpcdbW606PQ4p+XDn8BZhGyp6FXQhxEQhxGtCiE1CiA1CiGvs7UVCiJeFEFvt/wt7O5diaJFS8kb9Ggzg/cqXLUHXkjj0OO+YcyjLQEH3eH3kmJKE3QVpuDCliSkkTtyWH3y4KKygxDCQQnbJcNybuMNFsW6J/ju73+PIx4/stgBVm6BLmQP5E9sHcssYp+vozhApff+Jje6OylAlb+x6o9uxsB4lYEqqKSJgHsiNTS18lFzI11rinFOXz8Fxg/eMg1iYSLE19iZL/7aUVTWr+j2H5kQLOabJC3q7WyuUDLG23lrrCNsL3eN1HWOEKoQOlr5Y6Dpwg5TyQOBw4EohxEHAD4B/SymnA/+2nytGkE+aP6FGWreiK3a+RjgZImhHQKw0D8xIC104veRIk6SZ7H3nQZC02885hqJA2b7w5ZODHU4Y3befNS41JusaLhMe3fQw4VSYV3e92mW/NpdLwlnc+WKUV8443QAh2Vg/Mm38hosrXrmCq169ilAy1GWsVY8TNE2Sws9rZRfxM+18tpoT+WFzPbdF1/Eb/WxeNhdysu0GqYvXcdeHd/U74qUpFaLQMHnDPATZ4e/8yo5XrHnYjU3G6zoO5/5RP6dXQZdS7pFSfmg/DgObgPHAl4BH7N0eAc4arkkquiKl5Nerf41fwtxEkpf3rCaix8jfuZJPzXGEyMtICx2XB78pScqeFwWHgkjKcuk4Hf3osDRAfG6rGXZvGZxxUyfHlEzU210zboe7S62QtkVR3VXa+QR5pYy3LfP1tftH1EVPtGXCtvVpbcMwDZqMOEWGgfTk0lR4CPcbpxNxtTsAvnD25Uw+ZAlHx63POMfhY239Wt6u/Fe/5tCsRSgyDLbJccyOW5/JuNxxNMYbietxPmjaAMB43SDFyDRlGSz98qELISqA+cBKoFxKuQcs0QfKejjmMiHEKiHEqvr67Pb9jSQb6j7mnep3uKqpmWNjcZqxrLpZSY11cgozyvMozMmwxCIAp5ccKUkxvIIeTlo/QMcICHqufywAu1v3LegxaeCTkh/WRfhOnWVNPrT+IU568iQSertgxJJW1IXh6VzPpc3lArC1cf/w6fbExIDlSvr3jn932t6cbMZEUqKbON05zBlvFSlLeIrS+yxesIBl55xJgWny0s7dvP7ZVsZrOvd+9Nt+zaFZj1JgSsqLgpxVPZkX9sQo9BXSnGzm4Q0P83Kr1amqyDBIiVT6IpTJ9FnQhRB5wJPAtVLKPjuUpJT3SSkXSSkXlZaW9n6Aok+s3WjVxT45GuPgZLv7Yk4yyYKFh/PSdUtwDLr/3DDgdOM3TVIMrw+4zUJ3Of3D+joAwfwpANSE9100K45B2Czg4egyrozuIjeVi2ZqhFNhamPtfUtjYSt8TuZM6HwCt48xDitS5pOWLft1nfC28skbGztHqdTHLKOvwHSQ43NxwWGT+NO3FvPNkzssDjucVuTSMTcw1jDwS8n5rWHWhyupCvfdFVWjxwgYgomFOVQ6ZjAh0UCBK4eWRAu10fbPw2c4QOwf6f99EnQhhBtLzB+TUj5lb64VQoy1x8cC2V0CLsNYV/cRZbpOuWEwJSEQUjJO03lfW0R4/ndGe3o9IwQ+KUgxvO3UIraF7nINMga9DxTaMerNocp97hfHxGE4OXDphYTzZzLRaHe1dBSQUMjOOs2Z1OUc/rwyxqZcrI88x29W3zX4yY8SbUk7dfE6bn33Virtv11bKeKg6SbX40IIwTHTS5kxZUrXkyy9BU67A4Cj45ab6t3q93p97ZgW45J/XUKL1Mg3nAR9bpoCswAI7F5HY6SGUDKEVwpurnLjNq073e78/ZlGX6JcBPAAsElKeWeHoWeBZfbjZcAzQz89RU+si1VzsF0f+1PjAA5OpjgikeAfxlEE8jIv/rwjPilIiWEWdM0WdPfw/y0KxhyI3zSJ9JL+Hwcc0kV+jhdx5FWMM9sX2jpmKVaFtpNjmnjyp3c9SXA8f6oLcWY4wn0bHuTlT4e+MNhIEE6FmRSwLlhPbn2S+z6+D2ivophjeMj1dii3kGe7n+Z8ufOJFl7My5OuY4qmk6O7eXPXu72+9r3v3MrqmlVc5SznlFYXQb8LrWwOAKXROprjDdTFajkkmUKLT8EnLLddfTTzY9H7YqEfBVwEHC+EWGP/Ow34JXCiEGIrcKL9XDECxGKN7HRIWuLTuXT804SC03mgpo6bG5qI4iPoy7Aqi3vhtQV9OFwGD65/kAuWX8AL258EwOUevhj0NvLGTqfANHkj+T4PrHug05iUkktfupQXK18kKcBhugn63eTOPSPd7AM6W+iVkd1M1nQchRPpwuSjKNcaubrZshb/sfH1YXlPw4mUknAqzMLyheltL+94mUgqkhZ0v+El19OhBo83D658H876XeeTOd0c8tUfEBL5VMS8rO2hrnlHKjc9xYxkkrNbHKD7CfjcHDzFcm8VGiYJB3zaVMkYLclGOZkcp2UU1EQyP0O3L1Eub0sphZRyrpRynv1vuZSyUUq5VEo53f4/u0vAZRC76qwaFEaqlKtPXchpC2fgkxI3EJdeAr7+FaMaaXzSgSnYZ/r7QHls42N8XP8xr1RbN4xuz/CXP3AGx9HksKzJuz7s7AZpijezYs8K/rj6WWuD6Sbf70b4C/EY7RZoTWv7IudOrZlyTZCX04276IDjACg3DEp1ndrm3l0MmUZcj2NIgyLPeABc+EgYCVbVrqI+Vk+eFEQ0Nznevb7HpTO7zfotC/jwFY1jWkrSkmqgKbFvKQo7BEHTxNn0KSHTMoCWHljGScn/psBODouZYcoMg43mZIIeq8rmAxt+32PzjUxBZYruh+xqsgpCpVKlVJTkgK+9rKvpzu17M4dRwoslZB17Rw4FzYlm6uJ1XLPgGg7LPwKAoK9kSF+jWxwOkh0WoHVTZ2PjRqJalI9qrEiJTY1WTRKkh3y/dQcVcba7g2rtrkcJPUGtTBFM+QnsLWgA4xaAv4jQxKXMTqbYwmY+rN2/3C5tCVj+d//A/K1n0fLJjYBVAKs+Vke+JolJH7ke575O0wlXcAxnalYp3M2N+84ebXU4CJomxWYjUfwE/S4mF+fiKD+ImN4eTVOim2yREynNsbZtafmY85/71pDUYR8uMvuXr0ijmzrrG9YDsKvFikF2OycT8Lk7pbYLb4YV5OoGr13Oti3euq/URGu46t9X9diMYEuzdaGbXTSbI/VTeWL3Ho4cP3twk+0jVzTlM8lOfr3v4/s495/ncvifD+e6t74NgMNjuVRMo13Qo652Qd/WVIVu6lz72rVIAe5UvvXZ7o3TBZe9Tv4FDzNLs1w2y15cNuQXx+GkzcqtSFbzK+ejYOQQdBfxYd2HrNj1OnNTURJ4yPH0/U7T5XIzyy4PsKlp0z73bXW607ViwtKf/jwevWQxJe4Od0VakARexuaW8duaOr7Z0kplZBOb6oe/sNxAUYK+n/Dg+gc57/nzWFe/jl2RKgoMg0BRhTXYQdDjDH/c9WDJsWuLN8X7nk5dH6vnvOfP442qN3iz6s0u42vq1nDpS5da+z76E7627jvMTGnkBIu67DscVHgO4/b6PQDcv+5+AHI7+MgR1mMpvWkBORxrUfDkSJTdej3Pb3uFd6qtWumu+NieXWeFk8EX5ExZwpyIda53q9/tsiZhSpP/WfU/bG3eOjRvcohoE/SAKSkXLRyf8xlGspTXd71OXBpc3hJipthFc6wf5SHyJ5JvmpTreq/dh8KCtKBH8bFwspW0VBb0Uexpv8gmUmO59YyDcOcW8oV4goUJa6F9VyhzA/qUoO8nfNpi3bKvb1zPrng9EzWd4tJx1qC33eXSamRgMtFe5ApLhBrjfQ8Du/ntm9NxwHWx9h/UHR/cwYo9K3jjU8tHXaYbnJl8hzyRYGfBYhi/sNvzDTVLT/0yB2g6DulAN3UOzj+OO+q6JhoZpi8t6Ic481i3fSeXtbQiBfzi/Z/iQPDOjl2EtAnk9bIWMn7MLH5cb93lXPf6ddz01k2dRH1byzYe3vAwN7998xC+08HTJuhr9NnoDh93u38DYcuNcWI0xgGazi/086lt7Ud25om3sTL3OAKmSWwf8eKaqRF3CFJGgH8Zi/i3uYDJxe0iPtmdz0VNKR7YU0sweAgXHzUFv20UlNg9XXeHa7s9dyagBH0/wWlbtatqVrExWc94TTKhzLY+bQvdkIKImdkLogB5dvPq/pQk3dqylVPzZzEtMDldtzqux3lk4yNc+tKltFS+ic80+Wu1ZSXHPYWUf/d58IxMCKd38qEIvJzWbPUd3bJ1EnOSXS3MHE9eeo2jZtIZAFQYLiYmnUT1EGW6RtCUUDCJol4yfR0lBzCdas5qtr4bL2x/gfmPzuPTZuvi35vrYbRotWuL1+jjeP+Ie8lN1jHNdr+d1xrhW6kbKFp0Dree2Q93mTdA3qzjcEvYWd9zxm7bxaTFMZnLtespmt25+YnXH+A/QjUsTiRpCswEwJ1jWfDFhnXRqYlkbsa7EvT9hDYRe2nHS7RKneNCTqaU2GJlW+hRfOiZn51MwGFFKvTVQjdMg6Z4IyWfvs7Y1hqqo9bfYne4vSFBg0MwQdcpMUzC0o/v4LPwekbwbsXpZndwHt9s3sXE5p/x1dCnFJhdP4zCnHYX0JHHn85jp36Ma+bJ/DRsZc6eFY7yT+Nwzv7S13rP9J1qRbz8pGU7j8fncWVzCwYmj6+z0unX11u1SHQjs37mra2WD7rJKMU9/ThkTjHntmj8rkGyMJmkwT+VX5wzlxnl/Qs5PWjaVNxSEo713N4wbPdj9Thy+PCHJ3LX1+d1Gvf629eg9DwrCgefVX6gTdBrY5nbpDvzzTkFALsju3E5XAQ9QeaHY5Qk/RSmBd364sfwoRmZr+j5LsvP3xjrm6C31fcoNQxC4QbWetw0J5p5bUd7b85VqQZmGyZ362eRe+hFfOukw4Zl7vuiqfQwFoVWcHLD01zjshKqXwj7iESSfOqsp9gw2DS2Pbbc53ZywWGTITGbBZv+wcWx87nC+DNHahfwyuTinl6mnalLaL1uO+47ZzKn5lnmAH8O5PNh7ToA1tjhrVsbM2sRr7ZlO24pqTfGMiboQwTHc0ZsFdgRgb7SbrJC+4DILcMtJUL27KoJ2yUWPI48inK7XvBzctsvIu6AffG1Bd0NBAxoSmSuoGfWpVvRLSkjRV2sjsuKD+WNZoMfNSRolPlMKrZX5G1Bj0ofB5RmfpRL0I4kaIyFkFJy/evX8/but3vcv62+RysTGa/rtGoRzn7mbO5e+6v0PhGp4zfdfDz9Kr71pROGtalFT2iTjgbgHMdb6W0TLvkXyxf8ndOjMY5IJCkuKOh64FjLSvyS80M06cbIHUNedyGL3RDML+Ix55nsMMvYaZZySDLOnpgV7bMjXGnt5Grt1PB4tKmNVFOu61TLEsqCXgiOT499Mfkzrj1x1sBOnFuCGwmy58XUVvu75HUGux0XHVx0/rw2QW/ft8TQaUlmbsqNEvT9gN2R3UgkJdWboGYdhYmd1Hon43XZcbq2eJUWFfLoJYv3cabMIMfjw2tKWpKtRLQIL+94mSteuaLH/eujVlr8zvh4xmrWbW9jor32uDStRcYcw9VnIRwOciYvICz9THTYPtalt0BOEUG/m5WmJVJjA92EIo6zBH1aajM7ZRmTS/p3UT716t/guHYtxef/gYNSKaKyhrpYHTE9wgzbj7+1KXOs9JpEI+W6ge4vt77D+Zagv2PMJq9iIUcc0Ie7k+7ILcUjQbIvQbe+N153NxdWALtNoCadBAK2UeBst+TLDJ2olrn9RZWgZygbGjfw6IZHAfjL5r/gwMGCPZ+kx5sKDm7f2eEEdy7BYAFlwcwPW3R7fOSZ0JpspSXZ+8JoQ8hKuolpJZQmu74/mbIWrXy6u9fIkOFkbFEe79vCzUFnwTE3ABD0ubk6dRV/179AcPqRXQ/MK4OAFbFUIws7RV30hfEFfiYW5ZA75TAOTOog4MXPrCYNbb0439i2YYDvauip1cKUGIKiAtvyDVjlh6tlMUtmDqIiqzeAUwrkPkozh+0sUp+7hwZr9t1jChfOtjWMsoNYN+cH/D/tmxQbBglDLYoq+sm3/vUtfrXqVzTEG/j7J3/nEOccDujQWHnREcd3PsAXHLGIjsHi8vjIM00iWphQpD0ErKd60/V2WnxUL2FawfhOYwHDZJJdC96te8jzjl4dm5JcL2td9oW2tN1tEPC5qKWI7+vfYdq4HgTryO8BsF5OoaJ4gBUivXlM8Vk++ue2vgDACbEYLhNW7Fk5sHMOMaY0qTUTBA0PY9qMD6edaIWPI6YO0DoHu7uTC7mP0swhW9BzfD28TlrQ3RTnedLnLTvpOpITj2ZWKkVUhNgT2TPweQ4jStAzlDaf5xu73kAzNQ5pbSAqvWwyJxFyl/KFhQd3PiA4HoJjR2Gm/Ue4PARMSUyPEIq2/zC6q2VtSpNtoc8IGgYtZiH+4or02A9DuTxbVc00w/qR6qZ7VOvYOByC679zBSDSbhSAoB13LgQ9l2U44rtEL1vJq2XLOHr6wMsVjK84klLdYHOrVaSqQtM4KOFka3j1gM85lDQlmtCReDUvY/JtQS+zwhMPOeZ05k8aXGtiBy4MuhoGb1W9RUO8gc2tOyjTdXw5PQi6bRTl5fg58oD2z6E86ONXl53NMTHrYtFd68BMQAl6hlLit75Mb1RZjXTnhzfwD+NobtYuYdXc27oecP7f4OSfj+QUB47TS75pkDCjtETak4Q2NHZ1Czy84WFeqF/NgSmNqLsAd9FEvh6yEkdOb95MWI7jsIT13Gn4+1X/Y1goPwiu/hBmnJLe5HFZP7NpvSxY546bxd+vPnFQouaqOILZdsMTl5TE5ViOTiaJympqojW9HD38tDXyEFpuu4U+/QS4eg3zT162jyP7hhMHBp0zZjVT47v//i4XPH8Ba8KVLEgkcfu7XxRts9Ddnm4aoziceJ2TGJ9y8Pxnz2dkgxEl6BlKW0uyNkEfayR4yjiaD+UMggef2vWA3OJRiewYEC4PQdMgZUZpibf7I1/f9TrPbnuWu1bfhW7qSCl58pMnWeAfy+9q6vAGyxH5E/hBUyMvVDbwjr6A/0hcwgWtYf5UXUNprIS8TCgdXDS1U3Pn6WV55Hqc/OiMEagrc8BxLI3ZPUmFYFvOPA5NWOsUGxtGP9Go0U4qMrRgu4UOUDSwUMW9ceFEF52FtsV+/9XRamr1MPOSSVw9Crot5K7ucxjqc2dwXmsr6xrWsbImM9xYHVGCnoEk9ER6sbDNr5ynO/hYHgDAjLL9RLh7wrbQdRkjZNe/PjMcYfn25dz89s08sP4B7vnoHtbWr2VneCdn+SbgwEF+USkUTMIJTJAx3nctYNaCoxHAvGSKiPSPapRLTxTkeNjw41MG5UrpM3llnOCZzFdaw5zbEiecP4MDk5bAf7Bn/fC/fi+EbEGPGfmdBX2IcOLE2EvQ9y6nuyiRxJfbg6C3rUM5u2+wvnXMF/l6pAmfdPPwmszr6ZN53/7POdWRalKGFXY1Nncse2wf8069gnMPn0p5wEd+TgZYoYPB6WaypmM6ovxu178IGCZnRaIsDxYwITABzYCV1avZ3LSZfG8+xxo+mmUecyYWwbR54HCBqfODq67EUVSB/CQHocWI4M/4WvAjgW/BefzolR/ypnEw406YRe5uiSsV5OO60bfQW+3s3ohRzNjhEHTh7LIk2lHQg9JJXjKIz99DAIHD/v70YKGHyg5lx7rxFGoGnzaOvgtrb5SFnkE8sO4BTn7yZH668qcAHF7WXlhqrTmTLx48ju8t7aYt2f6Gy8u5rWGcYWthN0eaHJpI8pA4gX9sWUfxngY2NK3l3ep3uWTqWQQ2PsNGczKLJheC2wfXroevPoyzeApCCEReGQARMtNCH2lcR1/N9yY8wS+CP+SAmVZrtZKkn4+b3+Kjuo9GdW6tdmemkFFE+TCE2DqEC2OvignNCavT0NTc8dxTG2eNeQC5PX1PTPty0EPrwjyfhypZSpFuEjMyryWdEvQM4o/r/gjASjvE7PAP/5Ye+8ScwMSi4e9gPyI4vbiBYMhq+1Xrsn5cFTuewplo5hCjPXHj0I+XY5iSm/VvM3+SnQwSHAuzz24/n91vMiz9Pf9QP2fcdckJPHv9iYjy2VA0lbMSVqnin64Y3YXz1ngjuaZJwl3cfb33QeLEiSno1ISizUJ/ZONKFsRrWGNO69yvtCPjF8Gib8HZv+t2OOBzEcFPsWGQlJnXNFoJeobQmrKyJhc7LN/esdEYx4baha1KlLdHBezv2HHHzxj3d9pcoFv+9KkdmjVM3/0hD2knUTR+Ws8CYFvobgzlcrFxOgRup8NanJ17LldEPiEYOoDPGke3lndrsoWgaeLOG5469S6H9R1Jme3Zok2JJpxYbecA1poHdO5X2hGnC06/EwomdTuc53MRkT5KTR2NvtfzHymUoGcIe2zf4tf2bOPa2incVddAjh0WNSuZIhmclPGt5fqMHcFTaJr4TMkJkSTNsj2kb5LR/mPcbYzhd6lT+fLCCT2fb+YXAWh0laVrjSs6sPgywjkTOZENaLLnWuEjQWsqTNAw8QcHkRG6D1z2smCqw3eoOdlMgXBh4uLq1JW8L2cN+E4u4HURIYcyI4UUyX533RpuskQhRg4p5bDEn1bbNazH6Qbfir1BXFrW+Bs7qvhDTYi8gv0jaahPzDqdloMuIiJ9rNyxixvqDBr9UwHQgpMYp7cva3099f8wvAWceci4ns837zz0763lzhsuxece5Tj0TCSniOBZd1BuJhDOZL+ac6/cs3JIm3m3alGCpkmgYBAZofvAaVvoSb1d0BvjTeQkNFYYM3nWPAoQPbtceiHgcxORfsbaF4y2MMxMQQl6P3low0PMfXRuJwtgoMT1OL9b+zuiWpRqu03YWFvMtsiJ7DDLKDJNavQyJg40HTwTKZiIOOPXPGsciQOISS+BSdbinXvSYsp1y/95bDROoHgcb/3HcRT00uzBVVzB2PwsWWMYBsQBx+OXViheXzvX/2vbW3z7pW/z87d/P2TzCOkxckxBWcHwlKlwOqzvSUxrvwjVt+5hjJlgjZzGqXPG8IeLFrYXtusneT4XUXzp2ugN8cwqpduroAshHhRC1Akh1nfYNk8IsUIIsUYIsUoIkfkl/nphQ+MGznn2HP7rrf/a536/Xv1rwKqAOFhe2fEK9665l+e2Pcee1h14TZMcw7oV3CVLaSizOtfvkUX9LtiU6QR9LlY5LBGfJOoITrRLGUxcjBt4YmcjNzYIxhflUthN3WpFP3G60WtDKk8AACAASURBVL1WnZdQsm+Lee9V2fXUm3YO2TRajSRew8mYYbr4tvnQY5qVLasZGjtDWxmjm/xZX0ppwMvJs8cM+PwBn4swfortvgO10f3PQn8YOGWvbbcDt0kp5wG32M/3W0xp8qN3fsTW5q38e+e/e9xPM9qv+t3VHekvbQ2BX935KtWRasbqBivMgwCokcVUHfFj3pj9U3YvuokLD5886NfLJIQQVObOB8AnNPzzvgyHfxemWS3BZhpRavVCximre8jwOyyjoCXRN0H/tMWq7hnXBn83CvCbD++mHg236Rq2BX6X3Q0rnozx9NanWfB/C2hFJ6d1KmHfGL5xRMWgzp/rcRGR/rSFvr0lc8oSQx8Si6SUbwohKvbeDLSlWuUD1ezHvFv9Lluat1DsKyamx3rc758fP5B+XNWyDSYcM6DXq4vVsT20nXd3v4tDOPig5gOKHB5maxrbxXhgDU4MygoCHPHV7w3oNfYHkv5StJiTNXnHcGigHE75BWjti0w1sohxBUrQh4pcl7Xw3BBr3ud+O1p30JxopjKyGYDm1MATaGqjtdy/7n5qo7W8XvW6tdH0MHdC/oDPuS9cTjfoEE9FeGB9+++1JrKQ5Tcew8SiwbkunQ6B7sql3DDwJAv4147lfGf+xYOc9dAxUB/6tcCvhBC7gDuA/xy6KY08O1qtetunlC4krsc7WeJtNMYbue3je5mfSOCQkl0t2wb8ere+eyvffunbRLUoZwWOR5c6dUaM+UmdV30nA/C8cTgzyjO/+9Bg+NqiiXx3ynJKL36sfaO7XcCrZTFjC7IkVDMDyHVbNlh9h+bcUS3axQVz/vPnc9EL/7+9Mw+Pozjz8FtzaWYkWfct2/IhydjGt8EcBoMNGDAxLA7HLgEWsgQSEmA32QA5IGyy5Ngc7LOBBBLCEsKZkARIWEISJ5wxsQ0GjI0tW7Kt+z7mnu6u/aNbl62xjKyZkUb1Po8e9VR3q3+qmf5N9VdVX32CHs1sp/VpY8v/3Rnq5BMvfoKnPnxq0MyBbE9GXCYVATjs5t9t6DnEgd4DbEqfza/qm9hqzDdXRxoHpCsTARR2z6GmZxd7uvaMek6iGKuh3wTcJqWcDtwG/DTWgUKIG6w4+9a2tomZGL410IpDwoy3nwSgJ3LkI2l7sB0dydU9fcyORqnvHXtcsT3Yjk3Y+F3lddy142FcmvlBqwy7CWbPhbt7+M29t5CXMT4fwInKNadW8NC1q6iIkYXwl/oZH3mhYEVs0q1VejqtuG9IC3Hb5tu4+sWrB3IG7e3aS2/EHF99cjDEhi5BSHagGbFzjMfi9X0v0uRv4tut5rn5ms7jDc3cXBS/JGVOKwfLtva3ALhiz+vk2ooJeYvH3BF6ONJKglcYML8ga3tqx+XvjgdjNfRrgGet7WeAmJ2iUsoHpZQrpJQrCgriM/Z0NLpD3QPTf0eiNdBKoaaRZU086P9AD6V/vKlXSsqjGof8Y09wH9JDnDPzHIoObcUGzOzLI9sATziHwkzVIm059wGuitxBjSxnXrEy9PEi021O5ukOtPHAjgdY+YuVvNn0Jvt79vNq/RsAPL/veezYePlgAw82tzJf6wNhjCn17p4Dm3EZkmU+Hw/WGTzT0MSJkQgZYvRzx4rTWoC80W/2cc2Iany/8D8pzBy/xpHdytSYo5v2ua/z+AdIjBdjNfRG4Exr+2xg7/jIGX+2NG1h9VOr+dyfPxfzmNZAK4W6PjCTrDd8pKEHrNmLXrubGZpGrb91oFXjj/rZ2rz1mDUFogE6eqG7zlyE4BtdB/h1fT2/iKxh3fyiY/47qYrtxEt5zTBHvahx5eOHw5NFhmGwvf0t7n/nfhy4yTYkGbrk3lceQzd0Xqj5Lav9Pop1HRuwKGx+7t9pe+eYr9MT7qEv0seenlpmRzXOEz9m0e07yL/sMfOA3vh1uTmtkEtTsIkcaUMrWM4v9trHNcRz4zmLAcgxokjDya7W4x8gMV4cy7DFJ4A3gWohRL0Q4nrgX4DvCiF2AP8J3BBfmWNn86HNwNE/kK2BFgo1bdDQR2ih+zVzhp3fNZ1Z0Si60Gn0mR/MR3c+ynUvXXfMrZhA1EfvvnfJ8e8H4ATRhKFNI2PF5Ww62ozIKYKavh8f7O5M8jWdvf4ahHRyyt6zePFgPacFAzSGdrL50Gbawp2s94V5Ludafqadx4nREHbNzWv1rx/zddY+s5bVT65mb6ST4oiT8xaWmzMzy1aYBwzNwzPOOK0+mNZIO8W6zjs+M8x04YnjNzFv6Vxz+OddzsdwRtNpCQ4uo1jTVTOuE7E+KscyyuXKGLuWxyifUPgivoFtf9RP+pAsam+3vk2WK4vWQCunD22hj2DovrBp6HuCJSwyzE6QnW01lGeW81bzW0gk21q2ceHsC4+qR0pJIOrnTNHIh0Y5uflFFHRu4+9GFctnFR73/5sKuJ12blozh3PV08q44vRkcUNPD3cW5JPhL+RW2wtkSMnScJiXMrq567UvUxmJUONbxz9/7r/YvW8/8pd/oirg4rWG14jqUZz20VMr9C+f2AZ4wtO4qH+Wb2YRfKXDzJcSJ1xDOtWLw0E+COXy8eXlXHHSyLlZxsQQ/UW6RmfIzI/T5m/nkucuIdM5Da/Tw3MXP4cv0MEnfnU+fTKTTy39D65Ztm78dIxAys8U9UcHc1cc3oK++sWr2fjbjQS0oBlysSYLdI0wTre+zYyT7fHnUhwxg4Dvte4lokd4t9Vs/W9r/vuoeiJGBB3wGpK/nPRjCgrNlsN2o5IVFfFJWDQZ+eL6ece9vqRiOE5vJht8Af6ty8aznW9RZWvgSW0NS0OmAYe0IPe2dvKX7MuZ5nZy0oJqGssv5Ab/AXoi3Tyx++lRrnAknr5ZLOnPkglxNXMAl3NwWGKJprM7lMe8khiLWRwPy68FIE+30RLZzfP7nufPtWZq4r5oLy2BFh7f/Tif+dNNNNoF86Pt5Hnik+5gKClv6L7oYAu9rjt2rKtIG2yht/qO7ED1+80ROl16Hm57Fh7dRk3Xfj7o+ICI1PAaBtuPwdD7Y/H79ZkYGSWD+ZeLT6RMjblWxBGXdxoCuLa7jj2R+Rwqv4i6JZ+nMqJRGMzg8yEvhlFB4ZDBC3kXfJmlMovlwRD/vfX+gc9vLPqXTgRYGQzR5T45oTnqh7bQSzSNg7KQE+LRsX7RfVCyhFzMSVd3vnYn2xp3DDvkvu33sbvvAPe2tvOT5lZWlVWPv47DSHlD90f96CHz0X13++BQw/7Hwn4qjSzsUuAwbMPG6Q78nVAXdinplZm4M/Mojtg40LePut46AM4IBKntqx9xDPtQBiYu6S48Tps5mebkG/nKpz95HP+lQjE6aRmDLeVnbOspv/7n3L5pNTJ7Nt9olFzevIe/Rk6gYkiaCW9JFXmX/5Bbu7oJ08uPd/zkqNfoH012k+bh9lYXkdJT4vPPxCBtyOLOxbrOXllGdbxGSmWVsywy+AX2XutmvIbBHN9g/VV1F7LBb97zeX0fxkfHEFLe0H1RHzlRMxfIE3t+OpBM5/BhjO9HF9NDOh7dRmdweMhlV8cu9gfq8BqSey47HU9WAbMjkpZQ7UDH6KpgCAODQ76jTwXuDwFJ6cLjspsLCp//rYEc4QpFvHCnD87ONHLM1Z4A7JVrWWXbhU1q/EU/kYr8w/IGlS5jcTjK7N48Ht75EJsPbo55jc6wuZjEvI6D/DR8PieUxmdGaCzcrkHzztfyyM0rjN98juwZXNN7kO+2mE/v9fpBVobCfKelGynN0VkVWgQNB685Vg3UdzxJeUP3R/ysYxczo1F6oh38/IOfA8MN/dJeHy8HqsCTQ65mY3f32zy1+6mBrHSXvXAZ74b345EG7sw8bN4clkV8RGWILY1vkK/pVEXMlvnu9qPPIO1/ZDUMN55YSfYVijjg8g7Gkr2Fcwa27dWDqZq2GPOYeXhmT/c0jPwqnu58G2col7tev5dDvYf43f7fHXGN/vvK5pnJ0/qZnDY3/nHjYVLTBr+MaoOzWXtCHDvWs8qtoZ2DuW4WhcJU2pqwRc3JcidEe/mbXs1Py/4DSpfET4tFyhu6L+oj3TB4oLkNB2ns6jAXyu0Kmx+8+4rO5ssdnRyUhXiy8lnX7aAr0sLXt3ydP9T9YdgQJK8hcU/Lh8VXssQaPbO9bQelmkaFla5zZ9sohm5Ns9Z1Nx41xlqRQMSQp8CZxUOMtuJ0yK+i/vRvctrcQhaVZx9xrv2EDaRJuLmrj65IExf8+gJuf/V2Wvwtw47rCpot9F2+chaV57B8ZmI7tt1pg7OOt+mVnD0vjiPH3GY95WnGQNECPRtpc1Ctm/f5aq2RHXLOceeQOVZS2tB1QyegBcgwJNM1jbmyil2du5BSDrQkKrY9hgMIeUpwZeZxQaAHo9vMAtjgaxgIqYA5SzR9Wg5Ur8eevhKHtdBFsaaRKSVezcGujt1H1RS0rhs1vMrQFUlj0/Lpgy8caXDz3ylfdxOPfTJGJ+bar8Lpt3FNqI7KnhMHin9f+3tqump44J0HaAu00dlnjgbr1or56kULEhJmGIo7LYPqcISvtXWw3aiMX/wcoOo8gnMv5IGybw0UeYsugMsfY7ZuhlYzNRcPahuYdXgYK06ktKH3d0CmW6NXTu3cRXe4m2Z/M10BM+6VqxtEcJJbUIqt8lyqbE38qH0fRiSHBl/DQOIuACkFbitMUlRexSkB81HLq7voSitjbtDJ39v/xJO7n4ypyR80DV2THjyulK5+xQSky1XCnqILKM4aw8zJitXY0Xm283e8VWf2FX1v2/e45LlLuH/H/dz9yn20dNXikJKFVYsT3joHcHq8/LKxmXP6DGpFObmjLIxyXKTn47nqcS6/5FK+0dbBPW0dOKavRFSfTyR8Kqv6HLx0wg/48qZT+aeTE5P+OqUdpb8DMsMy9HUhc9ji/9W+TGfPQexSMs0waJeZzC7MgJM/xd3Rq1ltf5+cqIPa7kPDDL3PZh9oceSVVXJ+wIyxN8scPDnFPNL+IUWGiy1NW2JqCoTNETRh3YvHqWLoisSSc+duqm56Ymwnz1gFgCZtuAxIsyINK4MhMnWDV5teZkvzO+TqOiWz5o+T4o+Gy2V+Ub1tzCU/04PNFv8nhNzcXM7og0t8fqbNMdNaNWfczMv1X+e88zbw8RXTcTkSY7Upbej9s0TTpeSPlV9lYSTCfL+dH2y/j9quPWQZBjbAiTawxNk/fOpugp5iqjUfh/rqqRuSJtdnG6wukTOT830Bbunsplo7DXf7BziBeaEearrqYmoKhM0vgbCRYY5yUSgmC650tJvf4cFTNtO79FM81NzM19s6eLi5lfta25C2AHuiTVzV20dldfwyKh4Np8tDr/TyhrGAwjil6D3imnYbDTKfg0YBJSVm6o7iLDdCQME4JgU7FlK6idg/qcirS+wr/pEd3Tu4yfcqn03PYEvXboqsVUc8RFi/0FyWatGMPIx557F0/wu8paXztz2/pVDXaHU4CAz9+sueiQP4ZE8vT5y+Cdzl8Me7mB7ReLXvIIY0sIkjvy8DkV6ElASMdBVDV0w6HPmz+PR6IDCD17e9zUa7+TRaHbTxeEMzO9NcrPfbycpKzixfu8PBJzPvZ3u7jbMTaKY/0i7ChsF9Vkv8vAVFpDlsOO2JbTOndAu9P+SiG168HjdLVq5mnjXKpM8IMzNqGrpj+gqWTB/s2bdVrmOulS73oNDY6DP/TmSYoQ/mhth01klw+q18sPweKrQoBpEjev/7CUR8eKUkiEcZumLy4s2lZc13Bl7uP/tHLEwv5bIF15B1ZczlERJCUVkFGo5xW9DiWNiVfy7bs88ZeL1+YQnfvHRRwq7fT8q20HvCPdz4xxsBiBjpZs99yWIKdR1h2JA2g3LpYVPkLn5x+dXDT555KqsDg0uhrfUH2J6WRnWocvCYtAyYvxGqLxz4Fp6/YDE9O83hi9sa97Kh6sgMb92RXrJ0Az9uFXJRTGquX7sYyp4AdxZLKk6DMy8msWNaRmZOgTmiJJENpj/cegYyYVeLTcq20Gu6awa2RWQa3jQHFFRjA7KiZmwt18jiYPoi0jLzh5+cno/NPo3rG6exzh9gXiTKI82t5PlnDT/uskdh8eWDr3NmMStq5mb5a927I+pqjXRToOuEhAenfSJ8/BWK42DeBVBxWrJVDKN/kZhARE/YNW02gT0BHbCj6ki2gHjRH275cYeOX2aR4XKAOwuZUUyJNVfIGS2KuQhxKLuSW8Pv8/3Wdvq/5x/Rzj36RbPKKZI2siNp7Oh4c8RD2iJ9FOo6hjM94WN0FYqpwMYlpWxcUspnzpqbbCkJJ2UNvX+KfU64hw45DW+aacsiv5KlUR8ZhkGHvzjmDK6smYOTJy4O38OJoZ/QFBolQmWzIwrmcUpQ0BzZecTiuwBtmp98TQeHyqyoUMSD9DQH912xNGZjLZVJXUO3JhVla0G6bdmDvc35lfxrTz3P1jexO5jP6sr8Ec+35Znf7mHp4F05mz68nFV9DNOIy5Zyna8Wic7P/v694ZqiAfpklFxD4E5gSlGFQjE1SFlD7w+5eKVEcw8x7fwq0iSU6DqNopB1sZL3zF0LxYu4JHIPBjZe++JZ/PAfl41+4dKlzItE+Vifj0f3/3bYikn9mR4zjTS8alKRQqEYZ1LWVQYM3TCQ6YMJ+5l7DnA7ACUV88hNjzE1uPAEuPFVvrC7lYbuIOU5x5hcp9TMA7MmEOS5zAzqeg6ysMCcNdcWNNMNSPJxqxEuCoVinEnZFnogGsAlHDgB17QhoZL8uXzNezvPaGfwD6eOPpvtrHmFXLXqI+RhKF7M/up/oVQzR7u811ILQIu/hWv/71oAerUi8mN9kSgUCsUYSRlD1w2dNxre4PWG182FmLUA6cJsBTvzhhvyaRuu43dzvsLaeKTWtNkovvSb5OqmYdd0mUmMHnrvIQCydZ0DvaUsS0LiIoVCkdqkTMjlxboXuePVOwDYMHsDAB49yntGBVn5pcOOXTe/iHVxXFHe63JgT88jzRAc7D2EL+Lj2b3PsqnoFO7621NcYczgEmXoCoVinBm1hS6EeFgI0SqEeP+w8s8KIT4UQuwUQnw7fhKPjQYrD/OllZfywv4X2N3+PunREH8ylo0tVehxYs8oJD8qaAk0UdtTS9SIcurOlwlJJ3tlOYtHWERAoVAojodjCbk8AqwfWiCEOAvYCCySUi4A/mv8pX00Nu/dh9TdfKJLwyYFNb11pBuSbUYVJckw9MwCSjSDjnAzB/rMFLyzQ71cH/08d25arab9KxSKcWdUQ5dSvgJ0HlZ8E/BNKWXYOqY1Dto+Egd7W5FaBsU1LzM7bE75TTcMumTGwFTgRCK8+czWIvTpzezv3o8ACjVIr1rDpcvLE65HoVCkPmPtFK0CVgshtggh/iqEWDmeosaCIfow9AzcvQeo0My5/V4p6SGdvIwkjChJL2Bp2Ickyku1fyQ7aqNBL2H5rILRz1UoFIoxMFZDdwA5wCrgC8DTIkZiEiHEDUKIrUKIrW1tbWO83Ohoopc0LQ27HmRuNGSKlBLcOQnPSQxA9nQWhU0dB321zIxq1Miy+K5CrlAopjRjdbp64Flp8hZgACPOoZdSPiilXCGlXFFQEL/WqUYvWVZytRlRs4XeZbeTlp4Vt2selcVXometGni5MOpn/ZozmFuYcZSTFAqFYuyM1dB/A5wNIISoAlxA+3iJ+qhE9Ai6CJBvrUA0w5rU02Z3kJeZpAQ9Tg/Rkz7NpX0+yoNp3Njdg2PmycnRolAopgSjjkMXQjwBrAHyhRD1wF3Aw8DD1lDGCHCNlDJp+d07Q2afbaluhjjKrZzkhVEbzqzEruk3lIola/nC7/2ki078tkyYdWbStCgUitRnVEOXUl4ZY9dV46xlzHS0mkPk18i9RHGSZ0R5uKmFaKiUl8qSN8Xelebm1dyNrO16mlD1xaTbnUnTolAoUp+UmCnqb9wOwAzZyx57JQv0D1gZCvOKnkFeRvJa6ABrb3kIeu4iz5uXVB0KhSL1SYlcLiGH2Qr3GJIdxiyi0py0k7Qhi4eTVQ7OqZdsX6FQJJaUMPRA1Mw57paSDyKFtGOObOmWGRQkuYWuUCgUiSIlDD0UMXOfe6RBnSzm97o5miRL+Dl17sgrEikUCkWqkRKGHrQWs3Abkr1GGQ9qFwJQdOI6MtRSbwqFYoqQEm4X0oIAXB36Ci3kYrcJ5J1NnKzi1gqFYgoxKVroz+97nnu33Btzf7+h7zFmAZDusiNcXhg5G4FCoVCkJJPC0H+zcytP7H4a3dBH3B/Uw6QZkijmOG8VZlEoFFORSWHoQitAotHoax5xf1AP4ZYSl90crtjYE0qkPIVCoZgQTApDn5Ntrgn6bkvNiPtDWpg0CdevnsWC0mlsWFSSSHkKhUIxIZgUsYmFhXPhEHxtyxfJcH+LM6cPz4kSMCK4DDN2/sJnT0+SSoVCoUguk6KFfmLxdACCup8vvPKFI/YHjSguKXA77QghiJGaXaFQKFKaSWHoZTnege1C95E5UYKGhtMQpDnVOp0KhWLqMikM3bnvZdZ3VgDQ19d4xP6QoeGUNtyOSfHvKBQKRVyYHA548E2+0/MKt3R20ykMAtHAsN0hdOyGDbdqoSsUiinM5DD0FdcBUGqtRNToG95KD0oDu7QrQ1coFFOayWHo2TOQy/+Zsn5D9w839JCQ2A07HmXoCoViCjM5DB0QF/2AFpeZRbHB1zBsXwiJMBykpylDVygUU5dJY+gAeZ48XIakvnfQ0KWUhAQI6SRdTflXKBRTmEll6N5p+ZRqGns7DwyUaYaGLgQYTrwu1UJXKBRTl0ll6NOy8ynTNOp7Dw2U9YS6ALAZLtJdqoWuUCimLpPK0HPzCyjTNNpDg0m6ajs/BMATycCrYugKhWIKM6kM3ZOZR5mmEZR+/NYqRbWduwFwRHNx2SfVv6NQKBTjyqgOKIR4WAjRKoR4f4R9nxdCSCFEYhbudGdRqpk50ev76jGkwf7O3XgNA40SlcNFoVBMaY4l6PwI8D/Ao0MLhRDTgXOAg+MvKwbuLBaEIwhpY9PzmyhNL8WuR5gVjRJ0HpnjRaFQKKYSo7bQpZSvAJ0j7Po+8O+AHG9RMXFnM13T+FprN2lk0ehv5FConTmRKAFXbsJkKBQKxURkTEFnIcTHgAYp5Y5jOPYGIcRWIcTWtra2sVxukHQzsnNJoJvz9yxnY8G3+HTGcm7p6iHiTkzUR6FQKCYqH9nQhRBe4EvAV4/leCnlg1LKFVLKFQUFBR/1csNxZ8HN24h6Clhj28GcNx7hpvd+TabhxJaWcXx/W6FQKCY5Y2mhzwFmATuEEHVAObBdCFE8nsJikj8X58KLOdO5i4/b/wpAh5GhFoZWKBRTno9s6FLK96SUhVLKCillBVAPLJNSjryCczyoPh+HESJDmItBF9CNV00qUigUU5xjGbb4BPAmUC2EqBdCXB9/WaMw64xhL9OEphJzKRSKKc+ozVop5ZWj7K8YNzXHit0JlefC3j8QsXl4LHImTjWpSKFQTHEmb5zi8scg7OOlmjD3PPE267qDyVakUCgUSWXyGrojDRxpzC3sBcAX1pIsSKFQKJLL5DV0i3nFmdy2roqNS0qTLUWhUCiSyqQ3dCEEt6yrTLYMhUKhSDqqJ1GhUChSBGXoCoVCkSIoQ1coFIoUQRm6QqFQpAjK0BUKhSJFUIauUCgUKYIydIVCoUgRlKErFApFiiCkTNwKckKINuBAwi5okg+0J/iaY0HpHF+UzvFjMmiE1NY5U0o56gpBCTX0ZCCE2CqlXJFsHaOhdI4vSuf4MRk0gtIJKuSiUCgUKYMydIVCoUgRpoKhP5hsAceI0jm+KJ3jx2TQCEpn6sfQFQqFYqowFVroCoVCMSWY9IYuhJguhNgshNglhNgphLjFKs8VQrwshNhr/c6xyoUQ4r+FEDVCiHeFEMuSqPFuIUSDEOId6+eCIefcYWn8UAhxXrw1Wtd0CyHeEkLssHR+zSqfJYTYYtXlU0IIl1WeZr2usfZXJFnnI0KI2iH1ucQqT/h7fpheuxDibSHEC9brCVWfMTRO1LqsE0K8Z2naapVNmHv9KBoTc69LKSf1D1ACLLO2M4E9wHzg28DtVvntwLes7QuAFwEBrAK2JFHj3cDnRzh+PrADSANmAfsAewJ0CiDD2nYCW6w6ehq4wir/EXCTtf1p4EfW9hXAUwl6z2PpfATYNMLxCX/PD7v+vwKPAy9YrydUfcbQOFHrsg7IP6xswtzrR9GYkHt90rfQpZRNUsrt1nYfsAsoAzYC/2sd9r/Axdb2RuBRafI3IFsIUZIkjbHYCDwppQxLKWuBGuCkeGq0tEkppc966bR+JHA28Eur/PC67K/jXwJrhRAiiTpjkfD3vB8hRDlwIfAT67VggtXn4RpHIWl1OYqmCXGvj4FxvdcnvaEPxXpEXYrZYiuSUjaBaahAoXVYGXBoyGn1HN1c46kR4GbrcfDh/kfFZGq0Hr3fAVqBlzFbDN1Syv5VuIdqGdBp7e8B8pKhU0rZX5/fsOrz+0KItMN1WiTyPf8B8O+AYb3OY+LV5+Ea+5lodQnmF/cfhBDbhBA3WGUT7V4fSSMk4F5PGUMXQmQAvwJulVL2Hu3QEcoSMtRnBI0PAHOAJUAT8N1ka5RS6lLKJUA5ZkvhhKNomTA6hRALgTuAecBKIBf4YjJ1CiE2AK1Sym1Di4+iJeE6Y2iECVaXQzhNSrkMOB/4jBDijKMcmyytI2lMyL2eEoYuhHBiGuUvpJTPWsUt/Y9X1u9Wq7wemD7k9HKgMRkapZQtljEZwEMMPmolReNQpJTdwF8wY4/ZQoj+BcWHahnQae3PAjqTpHO9FdqSUsow8DOSX5+nAR8TQtQBT2KGWn7AxKrPIzQKIR6bgHUJgJSy0frdCvza0jWh7vWRDL5TVgAAAW9JREFUNCbqXp/0hm7FGH8K7JJSfm/IrueAa6zta4DfDim/2uoBXwX09D+uJVrjYfG8S4D3h2i8whr1MAuoBN6Kp0ZLT4EQItva9gDrMOP9m4FN1mGH12V/HW8C/iytnp4k6Nw95KYWmHHUofWZ0PccQEp5h5SyXEpZgdnJ+Wcp5T8xgeozhsarJlpdWlrShRCZ/dvAuZauiXSvj6gxYff6WHtTJ8oPcDrmI8q7wDvWzwWYscc/AXut37nW8QL4IWZs+D1gRRI1/tzS8K71xpYMOedLlsYPgfMTVJeLgLctPe8DX7XKZ1sfshrgGSDNKndbr2us/bOTrPPPVn2+DzzG4EiYhL/nI2hew+AIkglVnzE0Tri6tOpth/WzE/iSVT6R7vVYGhNyr6uZogqFQpEiTPqQi0KhUChMlKErFApFiqAMXaFQKFIEZegKhUKRIihDVygUihRBGbpCoVCkCMrQFQqFIkVQhq5QKBQpwv8D3rYk8yKUjCgAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x23f6ac9e9b0>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "ht[['open','close','high']].plot()\n",
    "plt.show()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[Getting data:]#########"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>time</th>\n",
       "      <th>price</th>\n",
       "      <th>pchange</th>\n",
       "      <th>change</th>\n",
       "      <th>volume</th>\n",
       "      <th>amount</th>\n",
       "      <th>type</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>09:59:36</td>\n",
       "      <td>10.01</td>\n",
       "      <td>+2.04</td>\n",
       "      <td>0.00</td>\n",
       "      <td>12</td>\n",
       "      <td>12012</td>\n",
       "      <td>卖盘</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>09:59:33</td>\n",
       "      <td>10.01</td>\n",
       "      <td>+2.04</td>\n",
       "      <td>0.00</td>\n",
       "      <td>6</td>\n",
       "      <td>6006</td>\n",
       "      <td>卖盘</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>09:59:30</td>\n",
       "      <td>10.01</td>\n",
       "      <td>+2.04</td>\n",
       "      <td>0.00</td>\n",
       "      <td>6</td>\n",
       "      <td>6006</td>\n",
       "      <td>卖盘</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>09:59:27</td>\n",
       "      <td>10.01</td>\n",
       "      <td>+2.04</td>\n",
       "      <td>-0.01</td>\n",
       "      <td>89</td>\n",
       "      <td>89089</td>\n",
       "      <td>卖盘</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>09:59:24</td>\n",
       "      <td>10.02</td>\n",
       "      <td>+2.14</td>\n",
       "      <td>0.00</td>\n",
       "      <td>11</td>\n",
       "      <td>11022</td>\n",
       "      <td>卖盘</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>09:59:21</td>\n",
       "      <td>10.02</td>\n",
       "      <td>+2.14</td>\n",
       "      <td>0.00</td>\n",
       "      <td>18</td>\n",
       "      <td>18036</td>\n",
       "      <td>买盘</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>09:59:18</td>\n",
       "      <td>10.02</td>\n",
       "      <td>+2.14</td>\n",
       "      <td>0.00</td>\n",
       "      <td>129</td>\n",
       "      <td>129258</td>\n",
       "      <td>买盘</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>09:59:15</td>\n",
       "      <td>10.02</td>\n",
       "      <td>+2.14</td>\n",
       "      <td>0.00</td>\n",
       "      <td>29</td>\n",
       "      <td>29058</td>\n",
       "      <td>买盘</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>09:59:12</td>\n",
       "      <td>10.02</td>\n",
       "      <td>+2.14</td>\n",
       "      <td>0.00</td>\n",
       "      <td>13</td>\n",
       "      <td>13026</td>\n",
       "      <td>买盘</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>09:59:09</td>\n",
       "      <td>10.02</td>\n",
       "      <td>+2.14</td>\n",
       "      <td>0.01</td>\n",
       "      <td>32</td>\n",
       "      <td>32064</td>\n",
       "      <td>买盘</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       time  price pchange  change  volume  amount type\n",
       "0  09:59:36  10.01   +2.04    0.00      12   12012   卖盘\n",
       "1  09:59:33  10.01   +2.04    0.00       6    6006   卖盘\n",
       "2  09:59:30  10.01   +2.04    0.00       6    6006   卖盘\n",
       "3  09:59:27  10.01   +2.04   -0.01      89   89089   卖盘\n",
       "4  09:59:24  10.02   +2.14    0.00      11   11022   卖盘\n",
       "5  09:59:21  10.02   +2.14    0.00      18   18036   买盘\n",
       "6  09:59:18  10.02   +2.14    0.00     129  129258   买盘\n",
       "7  09:59:15  10.02   +2.14    0.00      29   29058   买盘\n",
       "8  09:59:12  10.02   +2.14    0.00      13   13026   买盘\n",
       "9  09:59:09  10.02   +2.14    0.01      32   32064   买盘"
      ]
     },
     "execution_count": 71,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = ts.get_today_ticks('300274')            #当日的历史分笔数据\n",
    "df.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "      code  name      time  price  volume  preprice type\n",
      "0   300274  阳光电源  09:58:45  10.02   45500     10.01   买盘\n",
      "1   300274  阳光电源  09:56:12  10.01   44700     10.02   卖盘\n",
      "2   300274  阳光电源  09:52:30  10.05   44300     10.05   卖盘\n",
      "3   300274  阳光电源  09:52:00  10.07   88500     10.08   卖盘\n",
      "4   300274  阳光电源  09:50:30  10.06  124500     10.06   买盘\n",
      "5   300274  阳光电源  09:49:27  10.04   48700     10.03   买盘\n",
      "6   300274  阳光电源  09:48:48  10.03   53200     10.04   卖盘\n",
      "7   300274  阳光电源  09:45:51  10.05  184300     10.04   买盘\n",
      "8   300274  阳光电源  09:45:06  10.01   48600     10.00   买盘\n",
      "9   300274  阳光电源  09:43:33   9.97   40000      9.99   卖盘\n",
      "10  300274  阳光电源  09:42:36   9.97   40000      9.97   卖盘\n",
      "11  300274  阳光电源  09:40:42   9.95  161200      9.98   卖盘\n",
      "12  300274  阳光电源  09:37:27  10.03   52800     10.02   买盘\n",
      "13  300274  阳光电源  09:37:21  10.02   42500     10.03   卖盘\n",
      "14  300274  阳光电源  09:36:03  10.03   53400     10.02   买盘\n",
      "15  300274  阳光电源  09:35:39  10.01   65000     10.01   买盘\n",
      "16  300274  阳光电源  09:35:12  10.00   49200     10.00   买盘\n",
      "17  300274  阳光电源  09:35:00  10.00   44100     10.00   卖盘\n",
      "18  300274  阳光电源  09:34:57  10.00   98400     10.01   卖盘\n",
      "19  300274  阳光电源  09:34:36  10.01   55000     10.01   卖盘\n",
      "20  300274  阳光电源  09:33:54  10.03   64500     10.04   卖盘\n",
      "21  300274  阳光电源  09:33:36  10.04   46000     10.04   卖盘\n",
      "22  300274  阳光电源  09:32:12  10.03  141800     10.01   买盘\n",
      "23  300274  阳光电源  09:31:36  10.02   81200     10.02   买盘\n",
      "24  300274  阳光电源  09:31:00  10.07   97700     10.06   买盘\n",
      "25  300274  阳光电源  09:30:57  10.06   77800     10.07   卖盘\n",
      "26  300274  阳光电源  09:30:39  10.09   79200     10.09   卖盘\n",
      "27  300274  阳光电源  09:30:30  10.10   53800     10.08   买盘\n",
      "28  300274  阳光电源  09:30:06  10.08   99600     10.09   卖盘\n",
      "29  300274  阳光电源  09:30:03  10.09  109100     10.07   买盘\n",
      "30  300274  阳光电源  09:25:03  10.07  207100      0.00   买盘\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "卖盘    16\n",
       "买盘    15\n",
       "Name: type, dtype: int64"
      ]
     },
     "execution_count": 74,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "htbig= ts.get_sina_dd('300274', date='2020-04-7', vol=400)  #指定大于等于500手的数据\n",
    "print(htbig)\n",
    "htbig['type'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'list'>\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "data=pd.Series([1,2,3,4])\n",
    "data=list(data)\n",
    "data.reverse()\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
